[Intel-gfx] [PATCH] Revert "Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip"

Wed Nov 29 23:37:11 UTC 2017

What's the issue here?

On Wed, Nov 29, 2017 at 08:05:43AM +0000, Maarten Lankhorst wrote:
> ---
>  arch/x86/Kconfig                         |    2 -
>  arch/x86/include/asm/apic.h              |  270 +++++---
>  arch/x86/include/asm/desc.h              |    2 +-
>  arch/x86/include/asm/hw_irq.h            |    6 +-
>  arch/x86/include/asm/io_apic.h           |    2 +
>  arch/x86/include/asm/irq.h               |    4 +
>  arch/x86/include/asm/irq_vectors.h       |    8 +-
>  arch/x86/include/asm/irqdomain.h         |    1 -
>  arch/x86/include/asm/kvm_host.h          |    2 +-
>  arch/x86/include/asm/trace/irq_vectors.h |  248 -------
>  arch/x86/include/asm/x2apic.h            |   50 ++
>  arch/x86/include/asm/x86_init.h          |    2 -
>  arch/x86/kernel/apic/Makefile            |    2 +-
>  arch/x86/kernel/apic/apic.c              |  239 ++++---
>  arch/x86/kernel/apic/apic_common.c       |   46 --
>  arch/x86/kernel/apic/apic_flat_64.c      |   10 +-
>  arch/x86/kernel/apic/apic_noop.c         |   25 +-
>  arch/x86/kernel/apic/apic_numachip.c     |   12 +-
>  arch/x86/kernel/apic/bigsmp_32.c         |    8 +-
>  arch/x86/kernel/apic/io_apic.c           |  130 ++--
>  arch/x86/kernel/apic/probe_32.c          |   29 +-
>  arch/x86/kernel/apic/vector.c            | 1099 ++++++++++++------------------
>  arch/x86/kernel/apic/x2apic.h            |    9 -
>  arch/x86/kernel/apic/x2apic_cluster.c    |  196 ++++--
>  arch/x86/kernel/apic/x2apic_phys.c       |   44 +-
>  arch/x86/kernel/apic/x2apic_uv_x.c       |   17 +-
>  arch/x86/kernel/i8259.c                  |    1 -
>  arch/x86/kernel/idt.c                    |   12 +-
>  arch/x86/kernel/irq.c                    |  101 ++-
>  arch/x86/kernel/irqinit.c                |    4 +-
>  arch/x86/kernel/setup.c                  |   12 +
>  arch/x86/kernel/smpboot.c                |   99 ++-
>  arch/x86/kernel/time.c                   |    5 -
>  arch/x86/kernel/traps.c                  |    2 +-
>  arch/x86/kernel/vsmp_64.c                |   19 +
>  arch/x86/kernel/x86_init.c               |    1 -
>  arch/x86/xen/apic.c                      |    6 +-
>  arch/x86/xen/enlighten_pv.c              |    1 -
>  drivers/iommu/amd_iommu.c                |   39 +-
>  drivers/iommu/intel_irq_remapping.c      |   38 +-
>  drivers/pci/msi.c                        |    2 -
>  init/main.c                              |    2 +-
>  kernel/irq/Kconfig                       |    3 -
>  43 files changed, 1317 insertions(+), 1493 deletions(-)
>  create mode 100644 arch/x86/include/asm/x2apic.h
>  delete mode 100644 arch/x86/kernel/apic/apic_common.c
>  delete mode 100644 arch/x86/kernel/apic/x2apic.h
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 8eed3f94bfc7..1a060afd9913 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -93,10 +93,8 @@ config X86
>  	select GENERIC_FIND_FIRST_BIT
>  	select GENERIC_IOMAP
>  	select GENERIC_IRQ_EFFECTIVE_AFF_MASK	if SMP
> -	select GENERIC_IRQ_MATRIX_ALLOCATOR	if X86_LOCAL_APIC
>  	select GENERIC_IRQ_MIGRATION		if SMP
>  	select GENERIC_IRQ_PROBE
> -	select GENERIC_IRQ_RESERVATION_MODE
>  	select GENERIC_IRQ_SHOW
>  	select GENERIC_PENDING_IRQ		if SMP
>  	select GENERIC_SMP_IDLE_THREAD
> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> index a9e57f08bfa6..5f01671c68f2 100644
> --- a/arch/x86/include/asm/apic.h
> +++ b/arch/x86/include/asm/apic.h
> @@ -53,15 +53,6 @@ extern int local_apic_timer_c2_ok;
>  extern int disable_apic;
>  extern unsigned int lapic_timer_frequency;
>  
> -extern enum apic_intr_mode_id apic_intr_mode;
> -enum apic_intr_mode_id {
> -	APIC_PIC,
> -	APIC_VIRTUAL_WIRE,
> -	APIC_VIRTUAL_WIRE_NO_CONFIG,
> -	APIC_SYMMETRIC_IO,
> -	APIC_SYMMETRIC_IO_NO_ROUTING
> -};
> -
>  #ifdef CONFIG_SMP
>  extern void __inquire_remote_apic(int apicid);
>  #else /* CONFIG_SMP */
> @@ -136,13 +127,14 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
>  extern void disable_local_APIC(void);
>  extern void lapic_shutdown(void);
>  extern void sync_Arb_IDs(void);
> -extern void apic_intr_mode_init(void);
> +extern void init_bsp_APIC(void);
>  extern void setup_local_APIC(void);
>  extern void init_apic_mappings(void);
>  void register_lapic_address(unsigned long address);
>  extern void setup_boot_APIC_clock(void);
>  extern void setup_secondary_APIC_clock(void);
>  extern void lapic_update_tsc_freq(void);
> +extern int APIC_init_uniprocessor(void);
>  
>  #ifdef CONFIG_X86_64
>  static inline int apic_force_enable(unsigned long addr)
> @@ -153,7 +145,7 @@ static inline int apic_force_enable(unsigned long addr)
>  extern int apic_force_enable(unsigned long addr);
>  #endif
>  
> -extern void apic_bsp_setup(bool upmode);
> +extern int apic_bsp_setup(bool upmode);
>  extern void apic_ap_setup(void);
>  
>  /*
> @@ -169,10 +161,6 @@ static inline int apic_is_clustered_box(void)
>  #endif
>  
>  extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
> -extern void lapic_assign_system_vectors(void);
> -extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
> -extern void lapic_online(void);
> -extern void lapic_offline(void);
>  
>  #else /* !CONFIG_X86_LOCAL_APIC */
>  static inline void lapic_shutdown(void) { }
> @@ -182,9 +170,6 @@ static inline void disable_local_APIC(void) { }
>  # define setup_boot_APIC_clock x86_init_noop
>  # define setup_secondary_APIC_clock x86_init_noop
>  static inline void lapic_update_tsc_freq(void) { }
> -static inline void apic_intr_mode_init(void) { }
> -static inline void lapic_assign_system_vectors(void) { }
> -static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
>  #endif /* !CONFIG_X86_LOCAL_APIC */
>  
>  #ifdef CONFIG_X86_X2APIC
> @@ -280,63 +265,73 @@ struct irq_data;
>   * James Cleverdon.
>   */
>  struct apic {
> -	/* Hotpath functions first */
> -	void	(*eoi_write)(u32 reg, u32 v);
> -	void	(*native_eoi_write)(u32 reg, u32 v);
> -	void	(*write)(u32 reg, u32 v);
> -	u32	(*read)(u32 reg);
> -
> -	/* IPI related functions */
> -	void	(*wait_icr_idle)(void);
> -	u32	(*safe_wait_icr_idle)(void);
> -
> -	void	(*send_IPI)(int cpu, int vector);
> -	void	(*send_IPI_mask)(const struct cpumask *mask, int vector);
> -	void	(*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
> -	void	(*send_IPI_allbutself)(int vector);
> -	void	(*send_IPI_all)(int vector);
> -	void	(*send_IPI_self)(int vector);
> -
> -	/* dest_logical is used by the IPI functions */
> -	u32	dest_logical;
> -	u32	disable_esr;
> -	u32	irq_delivery_mode;
> -	u32	irq_dest_mode;
> -
> -	/* Functions and data related to vector allocation */
> -	void	(*vector_allocation_domain)(int cpu, struct cpumask *retmask,
> -					    const struct cpumask *mask);
> -	int	(*cpu_mask_to_apicid)(const struct cpumask *cpumask,
> -				      struct irq_data *irqdata,
> -				      unsigned int *apicid);
> -	u32	(*calc_dest_apicid)(unsigned int cpu);
> -
> -	/* ICR related functions */
> -	u64	(*icr_read)(void);
> -	void	(*icr_write)(u32 low, u32 high);
> -
> -	/* Probe, setup and smpboot functions */
> -	int	(*probe)(void);
> -	int	(*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
> -	int	(*apic_id_valid)(int apicid);
> -	int	(*apic_id_registered)(void);
> -
> -	bool	(*check_apicid_used)(physid_mask_t *map, int apicid);
> -	void	(*init_apic_ldr)(void);
> -	void	(*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
> -	void	(*setup_apic_routing)(void);
> -	int	(*cpu_present_to_apicid)(int mps_cpu);
> -	void	(*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
> -	int	(*check_phys_apicid_present)(int phys_apicid);
> -	int	(*phys_pkg_id)(int cpuid_apic, int index_msb);
> -
> -	u32	(*get_apic_id)(unsigned long x);
> -	u32	(*set_apic_id)(unsigned int id);
> +	char *name;
> +
> +	int (*probe)(void);
> +	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
> +	int (*apic_id_valid)(int apicid);
> +	int (*apic_id_registered)(void);
> +
> +	u32 irq_delivery_mode;
> +	u32 irq_dest_mode;
> +
> +	const struct cpumask *(*target_cpus)(void);
> +
> +	int disable_esr;
> +
> +	int dest_logical;
> +	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
> +
> +	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
> +					 const struct cpumask *mask);
> +	void (*init_apic_ldr)(void);
> +
> +	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
> +
> +	void (*setup_apic_routing)(void);
> +	int (*cpu_present_to_apicid)(int mps_cpu);
> +	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
> +	int (*check_phys_apicid_present)(int phys_apicid);
> +	int (*phys_pkg_id)(int cpuid_apic, int index_msb);
> +
> +	unsigned int (*get_apic_id)(unsigned long x);
> +	/* Can't be NULL on 64-bit */
> +	unsigned long (*set_apic_id)(unsigned int id);
> +
> +	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
> +				  struct irq_data *irqdata,
> +				  unsigned int *apicid);
> +
> +	/* ipi */
> +	void (*send_IPI)(int cpu, int vector);
> +	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
> +	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
> +					 int vector);
> +	void (*send_IPI_allbutself)(int vector);
> +	void (*send_IPI_all)(int vector);
> +	void (*send_IPI_self)(int vector);
>  
>  	/* wakeup_secondary_cpu */
> -	int	(*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
> +	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
>  
> -	void	(*inquire_remote_apic)(int apicid);
> +	void (*inquire_remote_apic)(int apicid);
> +
> +	/* apic ops */
> +	u32 (*read)(u32 reg);
> +	void (*write)(u32 reg, u32 v);
> +	/*
> +	 * ->eoi_write() has the same signature as ->write().
> +	 *
> +	 * Drivers can support both ->eoi_write() and ->write() by passing the same
> +	 * callback value. Kernel can override ->eoi_write() and fall back
> +	 * on write for EOI.
> +	 */
> +	void (*eoi_write)(u32 reg, u32 v);
> +	void (*native_eoi_write)(u32 reg, u32 v);
> +	u64 (*icr_read)(void);
> +	void (*icr_write)(u32 low, u32 high);
> +	void (*wait_icr_idle)(void);
> +	u32 (*safe_wait_icr_idle)(void);
>  
>  #ifdef CONFIG_X86_32
>  	/*
> @@ -351,7 +346,6 @@ struct apic {
>  	 */
>  	int (*x86_32_early_logical_apicid)(int cpu);
>  #endif
> -	char	*name;
>  };
>  
>  /*
> @@ -386,7 +380,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
>   */
>  #ifdef CONFIG_SMP
>  extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
> -extern int lapic_can_unplug_cpu(void);
>  #endif
>  
>  #ifdef CONFIG_X86_LOCAL_APIC
> @@ -470,33 +463,84 @@ static inline unsigned default_get_apic_id(unsigned long x)
>  extern void apic_send_IPI_self(int vector);
>  
>  DECLARE_PER_CPU(int, x2apic_extra_bits);
> +
> +extern int default_cpu_present_to_apicid(int mps_cpu);
> +extern int default_check_phys_apicid_present(int phys_apicid);
>  #endif
>  
>  extern void generic_bigsmp_probe(void);
>  
> +
>  #ifdef CONFIG_X86_LOCAL_APIC
>  
>  #include <asm/smp.h>
>  
>  #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
>  
> +static inline const struct cpumask *default_target_cpus(void)
> +{
> +#ifdef CONFIG_SMP
> +	return cpu_online_mask;
> +#else
> +	return cpumask_of(0);
> +#endif
> +}
> +
> +static inline const struct cpumask *online_target_cpus(void)
> +{
> +	return cpu_online_mask;
> +}
> +
>  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
>  
> -extern struct apic apic_noop;
>  
>  static inline unsigned int read_apic_id(void)
>  {
> -	unsigned int reg = apic_read(APIC_ID);
> +	unsigned int reg;
> +
> +	reg = apic_read(APIC_ID);
>  
>  	return apic->get_apic_id(reg);
>  }
>  
> -extern int default_apic_id_valid(int apicid);
> +static inline int default_apic_id_valid(int apicid)
> +{
> +	return (apicid < 255);
> +}
> +
>  extern int default_acpi_madt_oem_check(char *, char *);
> +
>  extern void default_setup_apic_routing(void);
>  
> -extern u32 apic_default_calc_apicid(unsigned int cpu);
> -extern u32 apic_flat_calc_apicid(unsigned int cpu);
> +extern struct apic apic_noop;
> +
> +#ifdef CONFIG_X86_32
> +
> +static inline int noop_x86_32_early_logical_apicid(int cpu)
> +{
> +	return BAD_APICID;
> +}
> +
> +/*
> + * Set up the logical destination ID.
> + *
> + * Intel recommends to set DFR, LDR and TPR before enabling
> + * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
> + * document number 292116).  So here it goes...
> + */
> +extern void default_init_apic_ldr(void);
> +
> +static inline int default_apic_id_registered(void)
> +{
> +	return physid_isset(read_apic_id(), phys_cpu_present_map);
> +}
> +
> +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
> +{
> +	return cpuid_apic >> index_msb;
> +}
> +
> +#endif
>  
>  extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
>  				   struct irq_data *irqdata,
> @@ -504,17 +548,71 @@ extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
>  extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
>  				      struct irq_data *irqdata,
>  				      unsigned int *apicid);
> -extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
> -extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
> -				   const struct cpumask *mask);
> -extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
> -				      const struct cpumask *mask);
> -extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
> +
> +static inline void
> +flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
> +			      const struct cpumask *mask)
> +{
> +	/* Careful. Some cpus do not strictly honor the set of cpus
> +	 * specified in the interrupt destination when using lowest
> +	 * priority interrupt delivery mode.
> +	 *
> +	 * In particular there was a hyperthreading cpu observed to
> +	 * deliver interrupts to the wrong hyperthread when only one
> +	 * hyperthread was specified in the interrupt desitination.
> +	 */
> +	cpumask_clear(retmask);
> +	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
> +}
> +
> +static inline void
> +default_vector_allocation_domain(int cpu, struct cpumask *retmask,
> +				 const struct cpumask *mask)
> +{
> +	cpumask_copy(retmask, cpumask_of(cpu));
> +}
> +
> +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
> +{
> +	return physid_isset(apicid, *map);
> +}
> +
> +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
> +{
> +	*retmap = *phys_map;
> +}
> +
> +static inline int __default_cpu_present_to_apicid(int mps_cpu)
> +{
> +	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
> +		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
> +	else
> +		return BAD_APICID;
> +}
> +
> +static inline int
> +__default_check_phys_apicid_present(int phys_apicid)
> +{
> +	return physid_isset(phys_apicid, phys_cpu_present_map);
> +}
> +
> +#ifdef CONFIG_X86_32
> +static inline int default_cpu_present_to_apicid(int mps_cpu)
> +{
> +	return __default_cpu_present_to_apicid(mps_cpu);
> +}
> +
> +static inline int
> +default_check_phys_apicid_present(int phys_apicid)
> +{
> +	return __default_check_phys_apicid_present(phys_apicid);
> +}
> +#else
>  extern int default_cpu_present_to_apicid(int mps_cpu);
>  extern int default_check_phys_apicid_present(int phys_apicid);
> +#endif
>  
>  #endif /* CONFIG_X86_LOCAL_APIC */
> -
>  extern void irq_enter(void);
>  extern void irq_exit(void);
>  
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 4011cb03ef08..0a3e808b9123 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -393,7 +393,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
>  void update_intr_gate(unsigned int n, const void *addr);
>  void alloc_intr_gate(unsigned int n, const void *addr);
>  
> -extern unsigned long system_vectors[];
> +extern unsigned long used_vectors[];
>  
>  #ifdef CONFIG_X86_64
>  DECLARE_PER_CPU(u32, debug_idt_ctr);
> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> index 2851077b6051..16d48a18c069 100644
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -16,8 +16,6 @@
>  
>  #include <asm/irq_vectors.h>
>  
> -#define IRQ_MATRIX_BITS		NR_VECTORS
> -
>  #ifndef __ASSEMBLY__
>  
>  #include <linux/percpu.h>
> @@ -117,13 +115,15 @@ struct irq_alloc_info {
>  
>  struct irq_cfg {
>  	unsigned int		dest_apicid;
> -	unsigned int		vector;
> +	u8			vector;
> +	u8			old_vector;
>  };
>  
>  extern struct irq_cfg *irq_cfg(unsigned int irq);
>  extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
>  extern void lock_vector_lock(void);
>  extern void unlock_vector_lock(void);
> +extern void setup_vector_irq(int cpu);
>  #ifdef CONFIG_SMP
>  extern void send_cleanup_vector(struct irq_cfg *);
>  extern void irq_complete_move(struct irq_cfg *cfg);
> diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
> index a8834dd546cd..5c27e146a166 100644
> --- a/arch/x86/include/asm/io_apic.h
> +++ b/arch/x86/include/asm/io_apic.h
> @@ -193,6 +193,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
>  extern void setup_IO_APIC(void);
>  extern void enable_IO_APIC(void);
>  extern void disable_IO_APIC(void);
> +extern void setup_ioapic_dest(void);
>  extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
>  extern void print_IO_APICs(void);
>  #else  /* !CONFIG_X86_IO_APIC */
> @@ -232,6 +233,7 @@ static inline void io_apic_init_mappings(void) { }
>  
>  static inline void setup_IO_APIC(void) { }
>  static inline void enable_IO_APIC(void) { }
> +static inline void setup_ioapic_dest(void) { }
>  
>  #endif
>  
> diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
> index 2395bb794c7b..d8632f8fa17d 100644
> --- a/arch/x86/include/asm/irq.h
> +++ b/arch/x86/include/asm/irq.h
> @@ -26,7 +26,11 @@ extern void irq_ctx_init(int cpu);
>  
>  struct irq_desc;
>  
> +#ifdef CONFIG_HOTPLUG_CPU
> +#include <linux/cpumask.h>
> +extern int check_irq_vectors_for_cpu_disable(void);
>  extern void fixup_irqs(void);
> +#endif
>  
>  #ifdef CONFIG_HAVE_KVM
>  extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 67421f649cfa..c20ffca8fef1 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -102,8 +102,12 @@
>  #define POSTED_INTR_NESTED_VECTOR	0xf0
>  #endif
>  
> -#define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
> -#define LOCAL_TIMER_VECTOR		0xee
> +/*
> + * Local APIC timer IRQ vector is on a different priority level,
> + * to work around the 'lost local interrupt if more than 2 IRQ
> + * sources per level' errata.
> + */
> +#define LOCAL_TIMER_VECTOR		0xef
>  
>  #define NR_VECTORS			 256
>  
> diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
> index 139feef467f7..4e5638539846 100644
> --- a/arch/x86/include/asm/irqdomain.h
> +++ b/arch/x86/include/asm/irqdomain.h
> @@ -9,7 +9,6 @@
>  enum {
>  	/* Allocate contiguous CPU vectors */
>  	X86_IRQ_ALLOC_CONTIGUOUS_VECTORS		= 0x1,
> -	X86_IRQ_ALLOC_LEGACY				= 0x2,
>  };
>  
>  extern struct irq_domain *x86_vector_domain;
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1bfb99770c34..7233445a20bd 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1424,7 +1424,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  static inline int kvm_cpu_get_apicid(int mps_cpu)
>  {
>  #ifdef CONFIG_X86_LOCAL_APIC
> -	return default_cpu_present_to_apicid(mps_cpu);
> +	return __default_cpu_present_to_apicid(mps_cpu);
>  #else
>  	WARN_ON_ONCE(1);
>  	return BAD_APICID;
> diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
> index 84b9ec0c1bc0..8eb139ed1a03 100644
> --- a/arch/x86/include/asm/trace/irq_vectors.h
> +++ b/arch/x86/include/asm/trace/irq_vectors.h
> @@ -138,254 +138,6 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
>  DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
>  #endif
>  
> -TRACE_EVENT(vector_config,
> -
> -	TP_PROTO(unsigned int irq, unsigned int vector,
> -		 unsigned int cpu, unsigned int apicdest),
> -
> -	TP_ARGS(irq, vector, cpu, apicdest),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	unsigned int,	vector		)
> -		__field(	unsigned int,	cpu		)
> -		__field(	unsigned int,	apicdest	)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->vector		= vector;
> -		__entry->cpu		= cpu;
> -		__entry->apicdest	= apicdest;
> -	),
> -
> -	TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x",
> -		  __entry->irq, __entry->vector, __entry->cpu,
> -		  __entry->apicdest)
> -);
> -
> -DECLARE_EVENT_CLASS(vector_mod,
> -
> -	TP_PROTO(unsigned int irq, unsigned int vector,
> -		 unsigned int cpu, unsigned int prev_vector,
> -		 unsigned int prev_cpu),
> -
> -	TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	unsigned int,	vector		)
> -		__field(	unsigned int,	cpu		)
> -		__field(	unsigned int,	prev_vector	)
> -		__field(	unsigned int,	prev_cpu	)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->vector		= vector;
> -		__entry->cpu		= cpu;
> -		__entry->prev_vector	= prev_vector;
> -		__entry->prev_cpu	= prev_cpu;
> -
> -	),
> -
> -	TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u",
> -		  __entry->irq, __entry->vector, __entry->cpu,
> -		  __entry->prev_vector, __entry->prev_cpu)
> -);
> -
> -#define DEFINE_IRQ_VECTOR_MOD_EVENT(name)				\
> -DEFINE_EVENT_FN(vector_mod, name,					\
> -	TP_PROTO(unsigned int irq, unsigned int vector,			\
> -		 unsigned int cpu, unsigned int prev_vector,		\
> -		 unsigned int prev_cpu),				\
> -	TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL);	\
> -
> -DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update);
> -DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear);
> -
> -DECLARE_EVENT_CLASS(vector_reserve,
> -
> -	TP_PROTO(unsigned int irq, int ret),
> -
> -	TP_ARGS(irq, ret),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq	)
> -		__field(	int,		ret	)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq = irq;
> -		__entry->ret = ret;
> -	),
> -
> -	TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret)
> -);
> -
> -#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name)	\
> -DEFINE_EVENT_FN(vector_reserve, name,	\
> -	TP_PROTO(unsigned int irq, int ret),	\
> -	TP_ARGS(irq, ret), NULL, NULL);		\
> -
> -DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed);
> -DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve);
> -
> -TRACE_EVENT(vector_alloc,
> -
> -	TP_PROTO(unsigned int irq, unsigned int vector, bool reserved,
> -		 int ret),
> -
> -	TP_ARGS(irq, vector, ret, reserved),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	unsigned int,	vector		)
> -		__field(	bool,		reserved	)
> -		__field(	int,		ret		)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->vector		= ret < 0 ? 0 : vector;
> -		__entry->reserved	= reserved;
> -		__entry->ret		= ret > 0 ? 0 : ret;
> -	),
> -
> -	TP_printk("irq=%u vector=%u reserved=%d ret=%d",
> -		  __entry->irq, __entry->vector,
> -		  __entry->reserved, __entry->ret)
> -);
> -
> -TRACE_EVENT(vector_alloc_managed,
> -
> -	TP_PROTO(unsigned int irq, unsigned int vector,
> -		 int ret),
> -
> -	TP_ARGS(irq, vector, ret),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	unsigned int,	vector		)
> -		__field(	int,		ret		)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->vector		= ret < 0 ? 0 : vector;
> -		__entry->ret		= ret > 0 ? 0 : ret;
> -	),
> -
> -	TP_printk("irq=%u vector=%u ret=%d",
> -		  __entry->irq, __entry->vector, __entry->ret)
> -);
> -
> -DECLARE_EVENT_CLASS(vector_activate,
> -
> -	TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
> -		 bool early),
> -
> -	TP_ARGS(irq, is_managed, can_reserve, early),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	bool,		is_managed	)
> -		__field(	bool,		can_reserve	)
> -		__field(	bool,		early		)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->is_managed	= is_managed;
> -		__entry->can_reserve	= can_reserve;
> -		__entry->early		= early;
> -	),
> -
> -	TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
> -		  __entry->irq, __entry->is_managed, __entry->can_reserve,
> -		  __entry->early)
> -);
> -
> -#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)				\
> -DEFINE_EVENT_FN(vector_activate, name,					\
> -	TP_PROTO(unsigned int irq, bool is_managed,			\
> -		 bool can_reserve, bool early),				\
> -	TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL);	\
> -
> -DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
> -DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
> -
> -TRACE_EVENT(vector_teardown,
> -
> -	TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved),
> -
> -	TP_ARGS(irq, is_managed, has_reserved),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	bool,		is_managed	)
> -		__field(	bool,		has_reserved	)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->is_managed	= is_managed;
> -		__entry->has_reserved	= has_reserved;
> -	),
> -
> -	TP_printk("irq=%u is_managed=%d has_reserved=%d",
> -		  __entry->irq, __entry->is_managed, __entry->has_reserved)
> -);
> -
> -TRACE_EVENT(vector_setup,
> -
> -	TP_PROTO(unsigned int irq, bool is_legacy, int ret),
> -
> -	TP_ARGS(irq, is_legacy, ret),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	bool,		is_legacy	)
> -		__field(	int,		ret		)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->is_legacy	= is_legacy;
> -		__entry->ret		= ret;
> -	),
> -
> -	TP_printk("irq=%u is_legacy=%d ret=%d",
> -		  __entry->irq, __entry->is_legacy, __entry->ret)
> -);
> -
> -TRACE_EVENT(vector_free_moved,
> -
> -	TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector,
> -		 bool is_managed),
> -
> -	TP_ARGS(irq, cpu, vector, is_managed),
> -
> -	TP_STRUCT__entry(
> -		__field(	unsigned int,	irq		)
> -		__field(	unsigned int,	cpu		)
> -		__field(	unsigned int,	vector		)
> -		__field(	bool,		is_managed	)
> -	),
> -
> -	TP_fast_assign(
> -		__entry->irq		= irq;
> -		__entry->cpu		= cpu;
> -		__entry->vector		= vector;
> -		__entry->is_managed	= is_managed;
> -	),
> -
> -	TP_printk("irq=%u cpu=%u vector=%u is_managed=%d",
> -		  __entry->irq, __entry->cpu, __entry->vector,
> -		  __entry->is_managed)
> -);
> -
> -
>  #endif /* CONFIG_X86_LOCAL_APIC */
>  
>  #undef TRACE_INCLUDE_PATH
> diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
> new file mode 100644
> index 000000000000..78ccf28d17db
> --- /dev/null
> +++ b/arch/x86/include/asm/x2apic.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Common bits for X2APIC cluster/physical modes.
> + */
> +
> +#ifndef _ASM_X86_X2APIC_H
> +#define _ASM_X86_X2APIC_H
> +
> +#include <asm/apic.h>
> +#include <asm/ipi.h>
> +#include <linux/cpumask.h>
> +
> +static int x2apic_apic_id_valid(int apicid)
> +{
> +	return 1;
> +}
> +
> +static int x2apic_apic_id_registered(void)
> +{
> +	return 1;
> +}
> +
> +static void
> +__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
> +{
> +	unsigned long cfg = __prepare_ICR(0, vector, dest);
> +	native_x2apic_icr_write(cfg, apicid);
> +}
> +
> +static unsigned int x2apic_get_apic_id(unsigned long id)
> +{
> +	return id;
> +}
> +
> +static unsigned long x2apic_set_apic_id(unsigned int id)
> +{
> +	return id;
> +}
> +
> +static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
> +{
> +	return initial_apicid >> index_msb;
> +}
> +
> +static void x2apic_send_IPI_self(int vector)
> +{
> +	apic_write(APIC_SELF_IPI, vector);
> +}
> +
> +#endif /* _ASM_X86_X2APIC_H */
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index aa4747569e23..5dd011a8b560 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -51,13 +51,11 @@ struct x86_init_resources {
>   *				are set up.
>   * @intr_init:			interrupt init code
>   * @trap_init:			platform specific trap setup
> - * @intr_mode_init:		interrupt delivery mode setup
>   */
>  struct x86_init_irqs {
>  	void (*pre_vector_init)(void);
>  	void (*intr_init)(void);
>  	void (*trap_init)(void);
> -	void (*intr_mode_init)(void);
>  };
>  
>  /**
> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
> index a6fcaf16cdbf..e59e341f9921 100644
> --- a/arch/x86/kernel/apic/Makefile
> +++ b/arch/x86/kernel/apic/Makefile
> @@ -7,7 +7,7 @@
>  # In particualr, smp_apic_timer_interrupt() is called in random places.
>  KCOV_INSTRUMENT		:= n
>  
> -obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_common.o apic_noop.o ipi.o vector.o
> +obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
>  obj-y				+= hw_nmi.o
>  
>  obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index 6e272f3ea984..f72ecd5c39a7 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -211,7 +211,11 @@ static inline int lapic_get_version(void)
>   */
>  static inline int lapic_is_integrated(void)
>  {
> +#ifdef CONFIG_X86_64
> +	return 1;
> +#else
>  	return APIC_INTEGRATED(lapic_get_version());
> +#endif
>  }
>  
>  /*
> @@ -294,11 +298,14 @@ int get_physical_broadcast(void)
>   */
>  int lapic_get_maxlvt(void)
>  {
> +	unsigned int v;
> +
> +	v = apic_read(APIC_LVR);
>  	/*
>  	 * - we always have APIC integrated on 64bit mode
>  	 * - 82489DXs do not report # of LVT entries
>  	 */
> -	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
> +	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
>  }
>  
>  /*
> @@ -1222,100 +1229,53 @@ void __init sync_Arb_IDs(void)
>  			APIC_INT_LEVELTRIG | APIC_DM_INIT);
>  }
>  
> -enum apic_intr_mode_id apic_intr_mode;
> -
> -static int __init apic_intr_mode_select(void)
> +/*
> + * An initial setup of the virtual wire mode.
> + */
> +void __init init_bsp_APIC(void)
>  {
> -	/* Check kernel option */
> -	if (disable_apic) {
> -		pr_info("APIC disabled via kernel command line\n");
> -		return APIC_PIC;
> -	}
> -
> -	/* Check BIOS */
> -#ifdef CONFIG_X86_64
> -	/* On 64-bit, the APIC must be integrated, Check local APIC only */
> -	if (!boot_cpu_has(X86_FEATURE_APIC)) {
> -		disable_apic = 1;
> -		pr_info("APIC disabled by BIOS\n");
> -		return APIC_PIC;
> -	}
> -#else
> -	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
> -
> -	/* Neither 82489DX nor integrated APIC ? */
> -	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
> -		disable_apic = 1;
> -		return APIC_PIC;
> -	}
> +	unsigned int value;
>  
> -	/* If the BIOS pretends there is an integrated APIC ? */
> -	if (!boot_cpu_has(X86_FEATURE_APIC) &&
> -		APIC_INTEGRATED(boot_cpu_apic_version)) {
> -		disable_apic = 1;
> -		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
> -				       boot_cpu_physical_apicid);
> -		return APIC_PIC;
> -	}
> -#endif
> +	/*
> +	 * Don't do the setup now if we have a SMP BIOS as the
> +	 * through-I/O-APIC virtual wire mode might be active.
> +	 */
> +	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
> +		return;
>  
> -	/* Check MP table or ACPI MADT configuration */
> -	if (!smp_found_config) {
> -		disable_ioapic_support();
> -		if (!acpi_lapic) {
> -			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
> -			return APIC_VIRTUAL_WIRE_NO_CONFIG;
> -		}
> -		return APIC_VIRTUAL_WIRE;
> -	}
> +	/*
> +	 * Do not trust the local APIC being empty at bootup.
> +	 */
> +	clear_local_APIC();
>  
> -#ifdef CONFIG_SMP
> -	/* If SMP should be disabled, then really disable it! */
> -	if (!setup_max_cpus) {
> -		pr_info("APIC: SMP mode deactivated\n");
> -		return APIC_SYMMETRIC_IO_NO_ROUTING;
> -	}
> +	/*
> +	 * Enable APIC.
> +	 */
> +	value = apic_read(APIC_SPIV);
> +	value &= ~APIC_VECTOR_MASK;
> +	value |= APIC_SPIV_APIC_ENABLED;
>  
> -	if (read_apic_id() != boot_cpu_physical_apicid) {
> -		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
> -		     read_apic_id(), boot_cpu_physical_apicid);
> -		/* Or can we switch back to PIC here? */
> -	}
> +#ifdef CONFIG_X86_32
> +	/* This bit is reserved on P4/Xeon and should be cleared */
> +	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
> +	    (boot_cpu_data.x86 == 15))
> +		value &= ~APIC_SPIV_FOCUS_DISABLED;
> +	else
>  #endif
> +		value |= APIC_SPIV_FOCUS_DISABLED;
> +	value |= SPURIOUS_APIC_VECTOR;
> +	apic_write(APIC_SPIV, value);
>  
> -	return APIC_SYMMETRIC_IO;
> -}
> -
> -/* Init the interrupt delivery mode for the BSP */
> -void __init apic_intr_mode_init(void)
> -{
> -	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
> -
> -	apic_intr_mode = apic_intr_mode_select();
> -
> -	switch (apic_intr_mode) {
> -	case APIC_PIC:
> -		pr_info("APIC: Keep in PIC mode(8259)\n");
> -		return;
> -	case APIC_VIRTUAL_WIRE:
> -		pr_info("APIC: Switch to virtual wire mode setup\n");
> -		default_setup_apic_routing();
> -		break;
> -	case APIC_VIRTUAL_WIRE_NO_CONFIG:
> -		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
> -		upmode = true;
> -		default_setup_apic_routing();
> -		break;
> -	case APIC_SYMMETRIC_IO:
> -		pr_info("APIC: Switch to symmetric I/O mode setup\n");
> -		default_setup_apic_routing();
> -		break;
> -	case APIC_SYMMETRIC_IO_NO_ROUTING:
> -		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
> -		break;
> -	}
> -
> -	apic_bsp_setup(upmode);
> +	/*
> +	 * Set up the virtual wire mode.
> +	 */
> +	apic_write(APIC_LVT0, APIC_DM_EXTINT);
> +	value = APIC_DM_NMI;
> +	if (!lapic_is_integrated())		/* 82489DX */
> +		value |= APIC_LVT_LEVEL_TRIGGER;
> +	if (apic_extnmi == APIC_EXTNMI_NONE)
> +		value |= APIC_LVT_MASKED;
> +	apic_write(APIC_LVT1, value);
>  }
>  
>  static void lapic_setup_esr(void)
> @@ -1539,9 +1499,7 @@ void setup_local_APIC(void)
>  		value = APIC_DM_NMI;
>  	else
>  		value = APIC_DM_NMI | APIC_LVT_MASKED;
> -
> -	/* Is 82489DX ? */
> -	if (!lapic_is_integrated())
> +	if (!lapic_is_integrated())		/* 82489DX */
>  		value |= APIC_LVT_LEVEL_TRIGGER;
>  	apic_write(APIC_LVT1, value);
>  
> @@ -1927,8 +1885,8 @@ void __init init_apic_mappings(void)
>  		 * yeah -- we lie about apic_version
>  		 * in case if apic was disabled via boot option
>  		 * but it's not a problem for SMP compiled kernel
> -		 * since apic_intr_mode_select is prepared for such
> -		 * a case and disable smp mode
> +		 * since smp_sanity_check is prepared for such a case
> +		 * and disable smp mode
>  		 */
>  		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
>  	}
> @@ -2284,6 +2242,44 @@ int hard_smp_processor_id(void)
>  	return read_apic_id();
>  }
>  
> +void default_init_apic_ldr(void)
> +{
> +	unsigned long val;
> +
> +	apic_write(APIC_DFR, APIC_DFR_VALUE);
> +	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
> +	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
> +	apic_write(APIC_LDR, val);
> +}
> +
> +int default_cpu_mask_to_apicid(const struct cpumask *mask,
> +			       struct irq_data *irqdata,
> +			       unsigned int *apicid)
> +{
> +	unsigned int cpu = cpumask_first(mask);
> +
> +	if (cpu >= nr_cpu_ids)
> +		return -EINVAL;
> +	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +	irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
> +	return 0;
> +}
> +
> +int flat_cpu_mask_to_apicid(const struct cpumask *mask,
> +			    struct irq_data *irqdata,
> +			    unsigned int *apicid)
> +
> +{
> +	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
> +	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
> +
> +	if (!cpu_mask)
> +		return -EINVAL;
> +	*apicid = (unsigned int)cpu_mask;
> +	cpumask_bits(effmsk)[0] = cpu_mask;
> +	return 0;
> +}
> +
>  /*
>   * Override the generic EOI implementation with an optimized version.
>   * Only called during early boot when only one CPU is active and with
> @@ -2326,27 +2322,72 @@ static void __init apic_bsp_up_setup(void)
>   * Returns:
>   * apic_id of BSP APIC
>   */
> -void __init apic_bsp_setup(bool upmode)
> +int __init apic_bsp_setup(bool upmode)
>  {
> +	int id;
> +
>  	connect_bsp_APIC();
>  	if (upmode)
>  		apic_bsp_up_setup();
>  	setup_local_APIC();
>  
> +	if (x2apic_mode)
> +		id = apic_read(APIC_LDR);
> +	else
> +		id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
> +
>  	enable_IO_APIC();
>  	end_local_APIC_setup();
>  	irq_remap_enable_fault_handling();
>  	setup_IO_APIC();
> +	/* Setup local timer */
> +	x86_init.timers.setup_percpu_clockev();
> +	return id;
> +}
> +
> +/*
> + * This initializes the IO-APIC and APIC hardware if this is
> + * a UP kernel.
> + */
> +int __init APIC_init_uniprocessor(void)
> +{
> +	if (disable_apic) {
> +		pr_info("Apic disabled\n");
> +		return -1;
> +	}
> +#ifdef CONFIG_X86_64
> +	if (!boot_cpu_has(X86_FEATURE_APIC)) {
> +		disable_apic = 1;
> +		pr_info("Apic disabled by BIOS\n");
> +		return -1;
> +	}
> +#else
> +	if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
> +		return -1;
> +
> +	/*
> +	 * Complain if the BIOS pretends there is one.
> +	 */
> +	if (!boot_cpu_has(X86_FEATURE_APIC) &&
> +	    APIC_INTEGRATED(boot_cpu_apic_version)) {
> +		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
> +			boot_cpu_physical_apicid);
> +		return -1;
> +	}
> +#endif
> +
> +	if (!smp_found_config)
> +		disable_ioapic_support();
> +
> +	default_setup_apic_routing();
> +	apic_bsp_setup(true);
> +	return 0;
>  }
>  
>  #ifdef CONFIG_UP_LATE_INIT
>  void __init up_late_init(void)
>  {
> -	if (apic_intr_mode == APIC_PIC)
> -		return;
> -
> -	/* Setup local timer */
> -	x86_init.timers.setup_percpu_clockev();
> +	APIC_init_uniprocessor();
>  }
>  #endif
>  
> diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
> deleted file mode 100644
> index a360801779ae..000000000000
> --- a/arch/x86/kernel/apic/apic_common.c
> +++ /dev/null
> @@ -1,46 +0,0 @@
> -/*
> - * Common functions shared between the various APIC flavours
> - *
> - * SPDX-License-Identifier: GPL-2.0
> - */
> -#include <linux/irq.h>
> -#include <asm/apic.h>
> -
> -u32 apic_default_calc_apicid(unsigned int cpu)
> -{
> -	return per_cpu(x86_cpu_to_apicid, cpu);
> -}
> -
> -u32 apic_flat_calc_apicid(unsigned int cpu)
> -{
> -	return 1U << cpu;
> -}
> -
> -bool default_check_apicid_used(physid_mask_t *map, int apicid)
> -{
> -	return physid_isset(apicid, *map);
> -}
> -
> -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
> -{
> -	*retmap = *phys_map;
> -}
> -
> -int default_cpu_present_to_apicid(int mps_cpu)
> -{
> -	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
> -		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
> -	else
> -		return BAD_APICID;
> -}
> -EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid);
> -
> -int default_check_phys_apicid_present(int phys_apicid)
> -{
> -	return physid_isset(phys_apicid, phys_cpu_present_map);
> -}
> -
> -int default_apic_id_valid(int apicid)
> -{
> -	return (apicid < 255);
> -}
> diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
> index aa85690e9b64..dedd5a41ba48 100644
> --- a/arch/x86/kernel/apic/apic_flat_64.c
> +++ b/arch/x86/kernel/apic/apic_flat_64.c
> @@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x)
>  	return (x >> 24) & 0xFF;
>  }
>  
> -static u32 set_apic_id(unsigned int id)
> +static unsigned long set_apic_id(unsigned int id)
>  {
>  	return (id & 0xFF) << 24;
>  }
> @@ -154,10 +154,12 @@ static struct apic apic_flat __ro_after_init = {
>  	.irq_delivery_mode		= dest_LowestPrio,
>  	.irq_dest_mode			= 1, /* logical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= APIC_DEST_LOGICAL,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= flat_vector_allocation_domain,
>  	.init_apic_ldr			= flat_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -170,7 +172,7 @@ static struct apic apic_flat __ro_after_init = {
>  	.get_apic_id			= flat_get_apic_id,
>  	.set_apic_id			= set_apic_id,
>  
> -	.calc_dest_apicid		= apic_flat_calc_apicid,
> +	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
>  
>  	.send_IPI			= default_send_IPI_single,
>  	.send_IPI_mask			= flat_send_IPI_mask,
> @@ -247,10 +249,12 @@ static struct apic apic_physflat __ro_after_init = {
>  	.irq_delivery_mode		= dest_Fixed,
>  	.irq_dest_mode			= 0, /* physical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= 0,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	/* not needed, but shouldn't hurt: */
>  	.init_apic_ldr			= flat_init_apic_ldr,
>  
> @@ -264,7 +268,7 @@ static struct apic apic_physflat __ro_after_init = {
>  	.get_apic_id			= flat_get_apic_id,
>  	.set_apic_id			= set_apic_id,
>  
> -	.calc_dest_apicid		= apic_default_calc_apicid,
> +	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
>  
>  	.send_IPI			= default_send_IPI_single_phys,
>  	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
> diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
> index 7b659c4480c9..c8d211277315 100644
> --- a/arch/x86/kernel/apic/apic_noop.c
> +++ b/arch/x86/kernel/apic/apic_noop.c
> @@ -84,6 +84,20 @@ static int noop_apic_id_registered(void)
>  	return physid_isset(0, phys_cpu_present_map);
>  }
>  
> +static const struct cpumask *noop_target_cpus(void)
> +{
> +	/* only BSP here */
> +	return cpumask_of(0);
> +}
> +
> +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
> +					  const struct cpumask *mask)
> +{
> +	if (cpu != 0)
> +		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
> +	cpumask_copy(retmask, cpumask_of(cpu));
> +}
> +
>  static u32 noop_apic_read(u32 reg)
>  {
>  	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
> @@ -95,13 +109,6 @@ static void noop_apic_write(u32 reg, u32 v)
>  	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
>  }
>  
> -#ifdef CONFIG_X86_32
> -static int noop_x86_32_early_logical_apicid(int cpu)
> -{
> -	return BAD_APICID;
> -}
> -#endif
> -
>  struct apic apic_noop __ro_after_init = {
>  	.name				= "noop",
>  	.probe				= noop_probe,
> @@ -114,10 +121,12 @@ struct apic apic_noop __ro_after_init = {
>  	/* logical delivery broadcast to all CPUs: */
>  	.irq_dest_mode			= 1,
>  
> +	.target_cpus			= noop_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= APIC_DEST_LOGICAL,
>  	.check_apicid_used		= default_check_apicid_used,
>  
> +	.vector_allocation_domain	= noop_vector_allocation_domain,
>  	.init_apic_ldr			= noop_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
> @@ -133,7 +142,7 @@ struct apic apic_noop __ro_after_init = {
>  	.get_apic_id			= noop_get_apic_id,
>  	.set_apic_id			= NULL,
>  
> -	.calc_dest_apicid		= apic_flat_calc_apicid,
> +	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
>  
>  	.send_IPI			= noop_send_IPI,
>  	.send_IPI_mask			= noop_send_IPI_mask,
> diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
> index 134e04506ab4..2fda912219a6 100644
> --- a/arch/x86/kernel/apic/apic_numachip.c
> +++ b/arch/x86/kernel/apic/apic_numachip.c
> @@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
>  	return id;
>  }
>  
> -static u32 numachip1_set_apic_id(unsigned int id)
> +static unsigned long numachip1_set_apic_id(unsigned int id)
>  {
>  	return (id & 0xff) << 24;
>  }
> @@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x)
>  	return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
>  }
>  
> -static u32 numachip2_set_apic_id(unsigned int id)
> +static unsigned long numachip2_set_apic_id(unsigned int id)
>  {
>  	return id << 24;
>  }
> @@ -249,10 +249,12 @@ static const struct apic apic_numachip1 __refconst = {
>  	.irq_delivery_mode		= dest_Fixed,
>  	.irq_dest_mode			= 0, /* physical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= 0,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	.init_apic_ldr			= flat_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -265,7 +267,7 @@ static const struct apic apic_numachip1 __refconst = {
>  	.get_apic_id			= numachip1_get_apic_id,
>  	.set_apic_id			= numachip1_set_apic_id,
>  
> -	.calc_dest_apicid		= apic_default_calc_apicid,
> +	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
>  
>  	.send_IPI			= numachip_send_IPI_one,
>  	.send_IPI_mask			= numachip_send_IPI_mask,
> @@ -298,10 +300,12 @@ static const struct apic apic_numachip2 __refconst = {
>  	.irq_delivery_mode		= dest_Fixed,
>  	.irq_dest_mode			= 0, /* physical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= 0,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	.init_apic_ldr			= flat_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -314,7 +318,7 @@ static const struct apic apic_numachip2 __refconst = {
>  	.get_apic_id			= numachip2_get_apic_id,
>  	.set_apic_id			= numachip2_set_apic_id,
>  
> -	.calc_dest_apicid		= apic_default_calc_apicid,
> +	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
>  
>  	.send_IPI			= numachip_send_IPI_one,
>  	.send_IPI_mask			= numachip_send_IPI_mask,
> diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
> index afee386ff711..e12fbcfc9571 100644
> --- a/arch/x86/kernel/apic/bigsmp_32.c
> +++ b/arch/x86/kernel/apic/bigsmp_32.c
> @@ -27,9 +27,9 @@ static int bigsmp_apic_id_registered(void)
>  	return 1;
>  }
>  
> -static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
> +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
>  {
> -	return false;
> +	return 0;
>  }
>  
>  static int bigsmp_early_logical_apicid(int cpu)
> @@ -155,10 +155,12 @@ static struct apic apic_bigsmp __ro_after_init = {
>  	/* phys delivery to target CPU: */
>  	.irq_dest_mode			= 0,
>  
> +	.target_cpus			= default_target_cpus,
>  	.disable_esr			= 1,
>  	.dest_logical			= 0,
>  	.check_apicid_used		= bigsmp_check_apicid_used,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	.init_apic_ldr			= bigsmp_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
> @@ -171,7 +173,7 @@ static struct apic apic_bigsmp __ro_after_init = {
>  	.get_apic_id			= bigsmp_get_apic_id,
>  	.set_apic_id			= NULL,
>  
> -	.calc_dest_apicid		= apic_default_calc_apicid,
> +	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
>  
>  	.send_IPI			= default_send_IPI_single_phys,
>  	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index 201579dc5242..18c8aca5bae7 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -1014,7 +1014,6 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
>  					  info->ioapic_pin))
>  			return -ENOMEM;
>  	} else {
> -		info->flags |= X86_IRQ_ALLOC_LEGACY;
>  		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
>  					      NULL);
>  		if (irq >= 0) {
> @@ -1587,43 +1586,6 @@ static int __init notimercheck(char *s)
>  }
>  __setup("no_timer_check", notimercheck);
>  
> -static void __init delay_with_tsc(void)
> -{
> -	unsigned long long start, now;
> -	unsigned long end = jiffies + 4;
> -
> -	start = rdtsc();
> -
> -	/*
> -	 * We don't know the TSC frequency yet, but waiting for
> -	 * 40000000000/HZ TSC cycles is safe:
> -	 * 4 GHz == 10 jiffies
> -	 * 1 GHz == 40 jiffies
> -	 */
> -	do {
> -		rep_nop();
> -		now = rdtsc();
> -	} while ((now - start) < 40000000000UL / HZ &&
> -		time_before_eq(jiffies, end));
> -}
> -
> -static void __init delay_without_tsc(void)
> -{
> -	unsigned long end = jiffies + 4;
> -	int band = 1;
> -
> -	/*
> -	 * We don't know any frequency yet, but waiting for
> -	 * 40940000000/HZ cycles is safe:
> -	 * 4 GHz == 10 jiffies
> -	 * 1 GHz == 40 jiffies
> -	 * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
> -	 */
> -	do {
> -		__delay(((1U << band++) * 10000000UL) / HZ);
> -	} while (band < 12 && time_before_eq(jiffies, end));
> -}
> -
>  /*
>   * There is a nasty bug in some older SMP boards, their mptable lies
>   * about the timer IRQ. We do the following to work around the situation:
> @@ -1642,12 +1604,8 @@ static int __init timer_irq_works(void)
>  
>  	local_save_flags(flags);
>  	local_irq_enable();
> -
> -	if (boot_cpu_has(X86_FEATURE_TSC))
> -		delay_with_tsc();
> -	else
> -		delay_without_tsc();
> -
> +	/* Let ten ticks pass... */
> +	mdelay((10 * 1000) / HZ);
>  	local_irq_restore(flags);
>  
>  	/*
> @@ -1863,36 +1821,26 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
>  	eoi_ioapic_pin(data->entry.vector, data);
>  }
>  
> -static void ioapic_configure_entry(struct irq_data *irqd)
> -{
> -	struct mp_chip_data *mpd = irqd->chip_data;
> -	struct irq_cfg *cfg = irqd_cfg(irqd);
> -	struct irq_pin_list *entry;
> -
> -	/*
> -	 * Only update when the parent is the vector domain, don't touch it
> -	 * if the parent is the remapping domain. Check the installed
> -	 * ioapic chip to verify that.
> -	 */
> -	if (irqd->chip == &ioapic_chip) {
> -		mpd->entry.dest = cfg->dest_apicid;
> -		mpd->entry.vector = cfg->vector;
> -	}
> -	for_each_irq_pin(entry, mpd->irq_2_pin)
> -		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
> -}
> -
>  static int ioapic_set_affinity(struct irq_data *irq_data,
>  			       const struct cpumask *mask, bool force)
>  {
>  	struct irq_data *parent = irq_data->parent_data;
> +	struct mp_chip_data *data = irq_data->chip_data;
> +	struct irq_pin_list *entry;
> +	struct irq_cfg *cfg;
>  	unsigned long flags;
>  	int ret;
>  
>  	ret = parent->chip->irq_set_affinity(parent, mask, force);
>  	raw_spin_lock_irqsave(&ioapic_lock, flags);
> -	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
> -		ioapic_configure_entry(irq_data);
> +	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
> +		cfg = irqd_cfg(irq_data);
> +		data->entry.dest = cfg->dest_apicid;
> +		data->entry.vector = cfg->vector;
> +		for_each_irq_pin(entry, data->irq_2_pin)
> +			__ioapic_write_entry(entry->apic, entry->pin,
> +					     data->entry);
> +	}
>  	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
>  
>  	return ret;
> @@ -2565,9 +2513,52 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
>  }
>  
>  /*
> - * This function updates target affinity of IOAPIC interrupts to include
> - * the CPUs which came online during SMP bringup.
> + * This function currently is only a helper for the i386 smp boot process where
> + * we need to reprogram the ioredtbls to cater for the cpus which have come online
> + * so mask in all cases should simply be apic->target_cpus()
>   */
> +#ifdef CONFIG_SMP
> +void __init setup_ioapic_dest(void)
> +{
> +	int pin, ioapic, irq, irq_entry;
> +	const struct cpumask *mask;
> +	struct irq_desc *desc;
> +	struct irq_data *idata;
> +	struct irq_chip *chip;
> +
> +	if (skip_ioapic_setup == 1)
> +		return;
> +
> +	for_each_ioapic_pin(ioapic, pin) {
> +		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
> +		if (irq_entry == -1)
> +			continue;
> +
> +		irq = pin_2_irq(irq_entry, ioapic, pin, 0);
> +		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
> +			continue;
> +
> +		desc = irq_to_desc(irq);
> +		raw_spin_lock_irq(&desc->lock);
> +		idata = irq_desc_get_irq_data(desc);
> +
> +		/*
> +		 * Honour affinities which have been set in early boot
> +		 */
> +		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
> +			mask = irq_data_get_affinity_mask(idata);
> +		else
> +			mask = apic->target_cpus();
> +
> +		chip = irq_data_get_irq_chip(idata);
> +		/* Might be lapic_chip for irq 0 */
> +		if (chip->irq_set_affinity)
> +			chip->irq_set_affinity(idata, mask, false);
> +		raw_spin_unlock_irq(&desc->lock);
> +	}
> +}
> +#endif
> +
>  #define IOAPIC_RESOURCE_NAME_SIZE 11
>  
>  static struct resource *ioapic_resources;
> @@ -2991,9 +2982,12 @@ int mp_irqdomain_activate(struct irq_domain *domain,
>  			  struct irq_data *irq_data, bool early)
>  {
>  	unsigned long flags;
> +	struct irq_pin_list *entry;
> +	struct mp_chip_data *data = irq_data->chip_data;
>  
>  	raw_spin_lock_irqsave(&ioapic_lock, flags);
> -	ioapic_configure_entry(irq_data);
> +	for_each_irq_pin(entry, data->irq_2_pin)
> +		__ioapic_write_entry(entry->apic, entry->pin, data->entry);
>  	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
>  	return 0;
>  }
> diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
> index fa22017de806..63287659adb6 100644
> --- a/arch/x86/kernel/apic/probe_32.c
> +++ b/arch/x86/kernel/apic/probe_32.c
> @@ -66,31 +66,6 @@ static void setup_apic_flat_routing(void)
>  #endif
>  }
>  
> -static int default_apic_id_registered(void)
> -{
> -	return physid_isset(read_apic_id(), phys_cpu_present_map);
> -}
> -
> -/*
> - * Set up the logical destination ID.  Intel recommends to set DFR, LDR and
> - * TPR before enabling an APIC.  See e.g. "AP-388 82489DX User's Manual"
> - * (Intel document number 292116).
> - */
> -static void default_init_apic_ldr(void)
> -{
> -	unsigned long val;
> -
> -	apic_write(APIC_DFR, APIC_DFR_VALUE);
> -	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
> -	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
> -	apic_write(APIC_LDR, val);
> -}
> -
> -static int default_phys_pkg_id(int cpuid_apic, int index_msb)
> -{
> -	return cpuid_apic >> index_msb;
> -}
> -
>  /* should be called last. */
>  static int probe_default(void)
>  {
> @@ -109,10 +84,12 @@ static struct apic apic_default __ro_after_init = {
>  	/* logical delivery broadcast to all CPUs: */
>  	.irq_dest_mode			= 1,
>  
> +	.target_cpus			= default_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= APIC_DEST_LOGICAL,
>  	.check_apicid_used		= default_check_apicid_used,
>  
> +	.vector_allocation_domain	= flat_vector_allocation_domain,
>  	.init_apic_ldr			= default_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
> @@ -125,7 +102,7 @@ static struct apic apic_default __ro_after_init = {
>  	.get_apic_id			= default_get_apic_id,
>  	.set_apic_id			= NULL,
>  
> -	.calc_dest_apicid		= apic_flat_calc_apicid,
> +	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
>  
>  	.send_IPI			= default_send_IPI_single,
>  	.send_IPI_mask			= default_send_IPI_mask_logical,
> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
> index 6a823a25eaff..e42fdd44972f 100644
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -11,7 +11,6 @@
>   * published by the Free Software Foundation.
>   */
>  #include <linux/interrupt.h>
> -#include <linux/seq_file.h>
>  #include <linux/init.h>
>  #include <linux/compiler.h>
>  #include <linux/slab.h>
> @@ -22,30 +21,20 @@
>  #include <asm/desc.h>
>  #include <asm/irq_remapping.h>
>  
> -#include <asm/trace/irq_vectors.h>
> -
>  struct apic_chip_data {
> -	struct irq_cfg		hw_irq_cfg;
> -	unsigned int		vector;
> -	unsigned int		prev_vector;
> -	unsigned int		cpu;
> -	unsigned int		prev_cpu;
> -	unsigned int		irq;
> -	struct hlist_node	clist;
> -	unsigned int		move_in_progress	: 1,
> -				is_managed		: 1,
> -				can_reserve		: 1,
> -				has_reserved		: 1;
> +	struct irq_cfg		cfg;
> +	cpumask_var_t		domain;
> +	cpumask_var_t		old_domain;
> +	u8			move_in_progress : 1;
>  };
>  
>  struct irq_domain *x86_vector_domain;
>  EXPORT_SYMBOL_GPL(x86_vector_domain);
>  static DEFINE_RAW_SPINLOCK(vector_lock);
> -static cpumask_var_t vector_searchmask;
> +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
>  static struct irq_chip lapic_controller;
> -static struct irq_matrix *vector_matrix;
> -#ifdef CONFIG_SMP
> -static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
> +#ifdef	CONFIG_X86_IO_APIC
> +static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
>  #endif
>  
>  void lock_vector_lock(void)
> @@ -61,37 +50,22 @@ void unlock_vector_lock(void)
>  	raw_spin_unlock(&vector_lock);
>  }
>  
> -void init_irq_alloc_info(struct irq_alloc_info *info,
> -			 const struct cpumask *mask)
> -{
> -	memset(info, 0, sizeof(*info));
> -	info->mask = mask;
> -}
> -
> -void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
> +static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
>  {
> -	if (src)
> -		*dst = *src;
> -	else
> -		memset(dst, 0, sizeof(*dst));
> -}
> -
> -static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
> -{
> -	if (!irqd)
> +	if (!irq_data)
>  		return NULL;
>  
> -	while (irqd->parent_data)
> -		irqd = irqd->parent_data;
> +	while (irq_data->parent_data)
> +		irq_data = irq_data->parent_data;
>  
> -	return irqd->chip_data;
> +	return irq_data->chip_data;
>  }
>  
> -struct irq_cfg *irqd_cfg(struct irq_data *irqd)
> +struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
>  {
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> +	struct apic_chip_data *data = apic_chip_data(irq_data);
>  
> -	return apicd ? &apicd->hw_irq_cfg : NULL;
> +	return data ? &data->cfg : NULL;
>  }
>  EXPORT_SYMBOL_GPL(irqd_cfg);
>  
> @@ -102,395 +76,270 @@ struct irq_cfg *irq_cfg(unsigned int irq)
>  
>  static struct apic_chip_data *alloc_apic_chip_data(int node)
>  {
> -	struct apic_chip_data *apicd;
> -
> -	apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
> -	if (apicd)
> -		INIT_HLIST_NODE(&apicd->clist);
> -	return apicd;
> -}
> -
> -static void free_apic_chip_data(struct apic_chip_data *apicd)
> -{
> -	kfree(apicd);
> -}
> -
> -static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
> -				unsigned int cpu)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> +	struct apic_chip_data *data;
>  
> -	lockdep_assert_held(&vector_lock);
> -
> -	apicd->hw_irq_cfg.vector = vector;
> -	apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
> -	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
> -	trace_vector_config(irqd->irq, vector, cpu,
> -			    apicd->hw_irq_cfg.dest_apicid);
> -}
> -
> -static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
> -			       unsigned int newcpu)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	struct irq_desc *desc = irq_data_to_desc(irqd);
> -
> -	lockdep_assert_held(&vector_lock);
> -
> -	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
> -			    apicd->cpu);
> -
> -	/* Setup the vector move, if required  */
> -	if (apicd->vector && cpu_online(apicd->cpu)) {
> -		apicd->move_in_progress = true;
> -		apicd->prev_vector = apicd->vector;
> -		apicd->prev_cpu = apicd->cpu;
> -	} else {
> -		apicd->prev_vector = 0;
> +	data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
> +	if (!data)
> +		return NULL;
> +	if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
> +		goto out_data;
> +	if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
> +		goto out_domain;
> +	return data;
> +out_domain:
> +	free_cpumask_var(data->domain);
> +out_data:
> +	kfree(data);
> +	return NULL;
> +}
> +
> +static void free_apic_chip_data(struct apic_chip_data *data)
> +{
> +	if (data) {
> +		free_cpumask_var(data->domain);
> +		free_cpumask_var(data->old_domain);
> +		kfree(data);
>  	}
> -
> -	apicd->vector = newvec;
> -	apicd->cpu = newcpu;
> -	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
> -	per_cpu(vector_irq, newcpu)[newvec] = desc;
>  }
>  
> -static void vector_assign_managed_shutdown(struct irq_data *irqd)
> +static int __assign_irq_vector(int irq, struct apic_chip_data *d,
> +			       const struct cpumask *mask,
> +			       struct irq_data *irqdata)
>  {
> -	unsigned int cpu = cpumask_first(cpu_online_mask);
> -
> -	apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
> -}
> +	/*
> +	 * NOTE! The local APIC isn't very good at handling
> +	 * multiple interrupts at the same interrupt level.
> +	 * As the interrupt level is determined by taking the
> +	 * vector number and shifting that right by 4, we
> +	 * want to spread these out a bit so that they don't
> +	 * all fall in the same interrupt level.
> +	 *
> +	 * Also, we've got to be careful not to trash gate
> +	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
> +	 */
> +	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
> +	static int current_offset = VECTOR_OFFSET_START % 16;
> +	int cpu, vector;
>  
> -static int reserve_managed_vector(struct irq_data *irqd)
> -{
> -	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	unsigned long flags;
> -	int ret;
> +	/*
> +	 * If there is still a move in progress or the previous move has not
> +	 * been cleaned up completely, tell the caller to come back later.
> +	 */
> +	if (d->move_in_progress ||
> +	    cpumask_intersects(d->old_domain, cpu_online_mask))
> +		return -EBUSY;
>  
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	apicd->is_managed = true;
> -	ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	trace_vector_reserve_managed(irqd->irq, ret);
> -	return ret;
> -}
> +	/* Only try and allocate irqs on cpus that are present */
> +	cpumask_clear(d->old_domain);
> +	cpumask_clear(searched_cpumask);
> +	cpu = cpumask_first_and(mask, cpu_online_mask);
> +	while (cpu < nr_cpu_ids) {
> +		int new_cpu, offset;
>  
> -static void reserve_irq_vector_locked(struct irq_data *irqd)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> +		/* Get the possible target cpus for @mask/@cpu from the apic */
> +		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
>  
> -	irq_matrix_reserve(vector_matrix);
> -	apicd->can_reserve = true;
> -	apicd->has_reserved = true;
> -	trace_vector_reserve(irqd->irq, 0);
> -	vector_assign_managed_shutdown(irqd);
> -}
> +		/*
> +		 * Clear the offline cpus from @vector_cpumask for searching
> +		 * and verify whether the result overlaps with @mask. If true,
> +		 * then the call to apic->cpu_mask_to_apicid() will
> +		 * succeed as well. If not, no point in trying to find a
> +		 * vector in this mask.
> +		 */
> +		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
> +		if (!cpumask_intersects(vector_searchmask, mask))
> +			goto next_cpu;
> +
> +		if (cpumask_subset(vector_cpumask, d->domain)) {
> +			if (cpumask_equal(vector_cpumask, d->domain))
> +				goto success;
> +			/*
> +			 * Mark the cpus which are not longer in the mask for
> +			 * cleanup.
> +			 */
> +			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
> +			vector = d->cfg.vector;
> +			goto update;
> +		}
>  
> -static int reserve_irq_vector(struct irq_data *irqd)
> -{
> -	unsigned long flags;
> +		vector = current_vector;
> +		offset = current_offset;
> +next:
> +		vector += 16;
> +		if (vector >= FIRST_SYSTEM_VECTOR) {
> +			offset = (offset + 1) % 16;
> +			vector = FIRST_EXTERNAL_VECTOR + offset;
> +		}
>  
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	reserve_irq_vector_locked(irqd);
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	return 0;
> -}
> +		/* If the search wrapped around, try the next cpu */
> +		if (unlikely(current_vector == vector))
> +			goto next_cpu;
>  
> -static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	bool resvd = apicd->has_reserved;
> -	unsigned int cpu = apicd->cpu;
> -	int vector = apicd->vector;
> +		if (test_bit(vector, used_vectors))
> +			goto next;
>  
> -	lockdep_assert_held(&vector_lock);
> +		for_each_cpu(new_cpu, vector_searchmask) {
> +			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
> +				goto next;
> +		}
> +		/* Found one! */
> +		current_vector = vector;
> +		current_offset = offset;
> +		/* Schedule the old vector for cleanup on all cpus */
> +		if (d->cfg.vector)
> +			cpumask_copy(d->old_domain, d->domain);
> +		for_each_cpu(new_cpu, vector_searchmask)
> +			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
> +		goto update;
> +
> +next_cpu:
> +		/*
> +		 * We exclude the current @vector_cpumask from the requested
> +		 * @mask and try again with the next online cpu in the
> +		 * result. We cannot modify @mask, so we use @vector_cpumask
> +		 * as a temporary buffer here as it will be reassigned when
> +		 * calling apic->vector_allocation_domain() above.
> +		 */
> +		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
> +		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
> +		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
> +		continue;
> +	}
> +	return -ENOSPC;
>  
> +update:
>  	/*
> -	 * If the current target CPU is online and in the new requested
> -	 * affinity mask, there is no point in moving the interrupt from
> -	 * one CPU to another.
> +	 * Exclude offline cpus from the cleanup mask and set the
> +	 * move_in_progress flag when the result is not empty.
>  	 */
> -	if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
> -		return 0;
> -
> -	vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
> -	if (vector > 0)
> -		apic_update_vector(irqd, vector, cpu);
> -	trace_vector_alloc(irqd->irq, vector, resvd, vector);
> -	return vector;
> -}
> -
> -static int assign_vector_locked(struct irq_data *irqd,
> -				const struct cpumask *dest)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	int vector = allocate_vector(irqd, dest);
> -
> -	if (vector < 0)
> -		return vector;
> -
> -	apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
> +	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
> +	d->move_in_progress = !cpumask_empty(d->old_domain);
> +	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
> +	d->cfg.vector = vector;
> +	cpumask_copy(d->domain, vector_cpumask);
> +success:
> +	/*
> +	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
> +	 * as we already established, that mask & d->domain & cpu_online_mask
> +	 * is not empty.
> +	 *
> +	 * vector_searchmask is a subset of d->domain and has the offline
> +	 * cpus masked out.
> +	 */
> +	cpumask_and(vector_searchmask, vector_searchmask, mask);
> +	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
> +					&d->cfg.dest_apicid));
>  	return 0;
>  }
>  
> -static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
> +static int assign_irq_vector(int irq, struct apic_chip_data *data,
> +			     const struct cpumask *mask,
> +			     struct irq_data *irqdata)
>  {
> +	int err;
>  	unsigned long flags;
> -	int ret;
>  
>  	raw_spin_lock_irqsave(&vector_lock, flags);
> -	cpumask_and(vector_searchmask, dest, cpu_online_mask);
> -	ret = assign_vector_locked(irqd, vector_searchmask);
> +	err = __assign_irq_vector(irq, data, mask, irqdata);
>  	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	return ret;
> -}
> -
> -static int assign_irq_vector_any_locked(struct irq_data *irqd)
> -{
> -	/* Get the affinity mask - either irq_default_affinity or (user) set */
> -	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
> -	int node = irq_data_get_node(irqd);
> -
> -	if (node == NUMA_NO_NODE)
> -		goto all;
> -	/* Try the intersection of @affmsk and node mask */
> -	cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
> -	if (!assign_vector_locked(irqd, vector_searchmask))
> -		return 0;
> -	/* Try the node mask */
> -	if (!assign_vector_locked(irqd, cpumask_of_node(node)))
> -		return 0;
> -all:
> -	/* Try the full affinity mask */
> -	cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
> -	if (!assign_vector_locked(irqd, vector_searchmask))
> -		return 0;
> -	/* Try the full online mask */
> -	return assign_vector_locked(irqd, cpu_online_mask);
> -}
> -
> -static int
> -assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
> -{
> -	if (irqd_affinity_is_managed(irqd))
> -		return reserve_managed_vector(irqd);
> -	if (info->mask)
> -		return assign_irq_vector(irqd, info->mask);
> -	/*
> -	 * Make only a global reservation with no guarantee. A real vector
> -	 * is associated at activation time.
> -	 */
> -	return reserve_irq_vector(irqd);
> +	return err;
>  }
>  
> -static int
> -assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
> +static int assign_irq_vector_policy(int irq, int node,
> +				    struct apic_chip_data *data,
> +				    struct irq_alloc_info *info,
> +				    struct irq_data *irqdata)
>  {
> -	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	int vector, cpu;
> -
> -	cpumask_and(vector_searchmask, vector_searchmask, affmsk);
> -	cpu = cpumask_first(vector_searchmask);
> -	if (cpu >= nr_cpu_ids)
> -		return -EINVAL;
> -	/* set_affinity might call here for nothing */
> -	if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
> +	if (info && info->mask)
> +		return assign_irq_vector(irq, data, info->mask, irqdata);
> +	if (node != NUMA_NO_NODE &&
> +	    assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
>  		return 0;
> -	vector = irq_matrix_alloc_managed(vector_matrix, cpu);
> -	trace_vector_alloc_managed(irqd->irq, vector, vector);
> -	if (vector < 0)
> -		return vector;
> -	apic_update_vector(irqd, vector, cpu);
> -	apic_update_irq_cfg(irqd, vector, cpu);
> -	return 0;
> -}
> -
> -static void clear_irq_vector(struct irq_data *irqd)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	bool managed = irqd_affinity_is_managed(irqd);
> -	unsigned int vector = apicd->vector;
> -
> -	lockdep_assert_held(&vector_lock);
> -
> -	if (!vector)
> -		return;
> -
> -	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
> -			   apicd->prev_cpu);
> -
> -	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
> -	irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
> -	apicd->vector = 0;
> -
> -	/* Clean up move in progress */
> -	vector = apicd->prev_vector;
> -	if (!vector)
> -		return;
> -
> -	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
> -	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
> -	apicd->prev_vector = 0;
> -	apicd->move_in_progress = 0;
> -	hlist_del_init(&apicd->clist);
> +	return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
>  }
>  
> -static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
> +static void clear_irq_vector(int irq, struct apic_chip_data *data)
>  {
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	unsigned long flags;
> +	struct irq_desc *desc;
> +	int cpu, vector;
>  
> -	trace_vector_deactivate(irqd->irq, apicd->is_managed,
> -				apicd->can_reserve, false);
> -
> -	/* Regular fixed assigned interrupt */
> -	if (!apicd->is_managed && !apicd->can_reserve)
> -		return;
> -	/* If the interrupt has a global reservation, nothing to do */
> -	if (apicd->has_reserved)
> +	if (!data->cfg.vector)
>  		return;
>  
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	clear_irq_vector(irqd);
> -	if (apicd->can_reserve)
> -		reserve_irq_vector_locked(irqd);
> -	else
> -		vector_assign_managed_shutdown(irqd);
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -}
> -
> -static int activate_reserved(struct irq_data *irqd)
> -{
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	int ret;
> -
> -	ret = assign_irq_vector_any_locked(irqd);
> -	if (!ret)
> -		apicd->has_reserved = false;
> -	return ret;
> -}
> +	vector = data->cfg.vector;
> +	for_each_cpu_and(cpu, data->domain, cpu_online_mask)
> +		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
>  
> -static int activate_managed(struct irq_data *irqd)
> -{
> -	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
> -	int ret;
> -
> -	cpumask_and(vector_searchmask, dest, cpu_online_mask);
> -	if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
> -		/* Something in the core code broke! Survive gracefully */
> -		pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
> -		return EINVAL;
> -	}
> +	data->cfg.vector = 0;
> +	cpumask_clear(data->domain);
>  
> -	ret = assign_managed_vector(irqd, vector_searchmask);
>  	/*
> -	 * This should not happen. The vector reservation got buggered.  Handle
> -	 * it gracefully.
> +	 * If move is in progress or the old_domain mask is not empty,
> +	 * i.e. the cleanup IPI has not been processed yet, we need to remove
> +	 * the old references to desc from all cpus vector tables.
>  	 */
> -	if (WARN_ON_ONCE(ret < 0)) {
> -		pr_err("Managed startup irq %u, no vector available\n",
> -		       irqd->irq);
> +	if (!data->move_in_progress && cpumask_empty(data->old_domain))
> +		return;
> +
> +	desc = irq_to_desc(irq);
> +	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
> +		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
> +		     vector++) {
> +			if (per_cpu(vector_irq, cpu)[vector] != desc)
> +				continue;
> +			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
> +			break;
> +		}
>  	}
> -       return ret;
> +	data->move_in_progress = 0;
>  }
>  
> -static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
> -			       bool early)
> +void init_irq_alloc_info(struct irq_alloc_info *info,
> +			 const struct cpumask *mask)
>  {
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	unsigned long flags;
> -	int ret = 0;
> -
> -	trace_vector_activate(irqd->irq, apicd->is_managed,
> -			      apicd->can_reserve, early);
> -
> -	/* Nothing to do for fixed assigned vectors */
> -	if (!apicd->can_reserve && !apicd->is_managed)
> -		return 0;
> -
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	if (early || irqd_is_managed_and_shutdown(irqd))
> -		vector_assign_managed_shutdown(irqd);
> -	else if (apicd->is_managed)
> -		ret = activate_managed(irqd);
> -	else if (apicd->has_reserved)
> -		ret = activate_reserved(irqd);
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	return ret;
> +	memset(info, 0, sizeof(*info));
> +	info->mask = mask;
>  }
>  
> -static void vector_free_reserved_and_managed(struct irq_data *irqd)
> +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
>  {
> -	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -
> -	trace_vector_teardown(irqd->irq, apicd->is_managed,
> -			      apicd->has_reserved);
> -
> -	if (apicd->has_reserved)
> -		irq_matrix_remove_reserved(vector_matrix);
> -	if (apicd->is_managed)
> -		irq_matrix_remove_managed(vector_matrix, dest);
> +	if (src)
> +		*dst = *src;
> +	else
> +		memset(dst, 0, sizeof(*dst));
>  }
>  
>  static void x86_vector_free_irqs(struct irq_domain *domain,
>  				 unsigned int virq, unsigned int nr_irqs)
>  {
> -	struct apic_chip_data *apicd;
> -	struct irq_data *irqd;
> +	struct apic_chip_data *apic_data;
> +	struct irq_data *irq_data;
>  	unsigned long flags;
>  	int i;
>  
>  	for (i = 0; i < nr_irqs; i++) {
> -		irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
> -		if (irqd && irqd->chip_data) {
> +		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
> +		if (irq_data && irq_data->chip_data) {
>  			raw_spin_lock_irqsave(&vector_lock, flags);
> -			clear_irq_vector(irqd);
> -			vector_free_reserved_and_managed(irqd);
> -			apicd = irqd->chip_data;
> -			irq_domain_reset_irq_data(irqd);
> +			clear_irq_vector(virq + i, irq_data->chip_data);
> +			apic_data = irq_data->chip_data;
> +			irq_domain_reset_irq_data(irq_data);
>  			raw_spin_unlock_irqrestore(&vector_lock, flags);
> -			free_apic_chip_data(apicd);
> +			free_apic_chip_data(apic_data);
> +#ifdef	CONFIG_X86_IO_APIC
> +			if (virq + i < nr_legacy_irqs())
> +				legacy_irq_data[virq + i] = NULL;
> +#endif
>  		}
>  	}
>  }
>  
> -static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
> -				    struct apic_chip_data *apicd)
> -{
> -	unsigned long flags;
> -	bool realloc = false;
> -
> -	apicd->vector = ISA_IRQ_VECTOR(virq);
> -	apicd->cpu = 0;
> -
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	/*
> -	 * If the interrupt is activated, then it must stay at this vector
> -	 * position. That's usually the timer interrupt (0).
> -	 */
> -	if (irqd_is_activated(irqd)) {
> -		trace_vector_setup(virq, true, 0);
> -		apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
> -	} else {
> -		/* Release the vector */
> -		apicd->can_reserve = true;
> -		clear_irq_vector(irqd);
> -		realloc = true;
> -	}
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	return realloc;
> -}
> -
>  static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
>  				 unsigned int nr_irqs, void *arg)
>  {
>  	struct irq_alloc_info *info = arg;
> -	struct apic_chip_data *apicd;
> -	struct irq_data *irqd;
> +	struct apic_chip_data *data;
> +	struct irq_data *irq_data;
>  	int i, err, node;
>  
>  	if (disable_apic)
> @@ -501,37 +350,34 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
>  		return -ENOSYS;
>  
>  	for (i = 0; i < nr_irqs; i++) {
> -		irqd = irq_domain_get_irq_data(domain, virq + i);
> -		BUG_ON(!irqd);
> -		node = irq_data_get_node(irqd);
> -		WARN_ON_ONCE(irqd->chip_data);
> -		apicd = alloc_apic_chip_data(node);
> -		if (!apicd) {
> +		irq_data = irq_domain_get_irq_data(domain, virq + i);
> +		BUG_ON(!irq_data);
> +		node = irq_data_get_node(irq_data);
> +#ifdef	CONFIG_X86_IO_APIC
> +		if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
> +			data = legacy_irq_data[virq + i];
> +		else
> +#endif
> +			data = alloc_apic_chip_data(node);
> +		if (!data) {
>  			err = -ENOMEM;
>  			goto error;
>  		}
>  
> -		apicd->irq = virq + i;
> -		irqd->chip = &lapic_controller;
> -		irqd->chip_data = apicd;
> -		irqd->hwirq = virq + i;
> -		irqd_set_single_target(irqd);
> -		/*
> -		 * Legacy vectors are already assigned when the IOAPIC
> -		 * takes them over. They stay on the same vector. This is
> -		 * required for check_timer() to work correctly as it might
> -		 * switch back to legacy mode. Only update the hardware
> -		 * config.
> -		 */
> -		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
> -			if (!vector_configure_legacy(virq + i, irqd, apicd))
> -				continue;
> -		}
> -
> -		err = assign_irq_vector_policy(irqd, info);
> -		trace_vector_setup(virq + i, false, err);
> +		irq_data->chip = &lapic_controller;
> +		irq_data->chip_data = data;
> +		irq_data->hwirq = virq + i;
> +		err = assign_irq_vector_policy(virq + i, node, data, info,
> +					       irq_data);
>  		if (err)
>  			goto error;
> +		/*
> +		 * If the apic destination mode is physical, then the
> +		 * effective affinity is restricted to a single target
> +		 * CPU. Mark the interrupt accordingly.
> +		 */
> +		if (!apic->irq_dest_mode)
> +			irqd_set_single_target(irq_data);
>  	}
>  
>  	return 0;
> @@ -541,56 +387,9 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
>  	return err;
>  }
>  
> -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> -void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
> -			   struct irq_data *irqd, int ind)
> -{
> -	unsigned int cpu, vector, prev_cpu, prev_vector;
> -	struct apic_chip_data *apicd;
> -	unsigned long flags;
> -	int irq;
> -
> -	if (!irqd) {
> -		irq_matrix_debug_show(m, vector_matrix, ind);
> -		return;
> -	}
> -
> -	irq = irqd->irq;
> -	if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
> -		seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
> -		seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
> -		return;
> -	}
> -
> -	apicd = irqd->chip_data;
> -	if (!apicd) {
> -		seq_printf(m, "%*sVector: Not assigned\n", ind, "");
> -		return;
> -	}
> -
> -	raw_spin_lock_irqsave(&vector_lock, flags);
> -	cpu = apicd->cpu;
> -	vector = apicd->vector;
> -	prev_cpu = apicd->prev_cpu;
> -	prev_vector = apicd->prev_vector;
> -	raw_spin_unlock_irqrestore(&vector_lock, flags);
> -	seq_printf(m, "%*sVector: %5u\n", ind, "", vector);
> -	seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu);
> -	if (prev_vector) {
> -		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector);
> -		seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu);
> -	}
> -}
> -#endif
> -
>  static const struct irq_domain_ops x86_vector_domain_ops = {
> -	.alloc		= x86_vector_alloc_irqs,
> -	.free		= x86_vector_free_irqs,
> -	.activate	= x86_vector_activate,
> -	.deactivate	= x86_vector_deactivate,
> -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> -	.debug_show	= x86_vector_debug_show,
> -#endif
> +	.alloc	= x86_vector_alloc_irqs,
> +	.free	= x86_vector_free_irqs,
>  };
>  
>  int __init arch_probe_nr_irqs(void)
> @@ -620,40 +419,35 @@ int __init arch_probe_nr_irqs(void)
>  	return legacy_pic->probe();
>  }
>  
> -void lapic_assign_legacy_vector(unsigned int irq, bool replace)
> +#ifdef	CONFIG_X86_IO_APIC
> +static void __init init_legacy_irqs(void)
>  {
> +	int i, node = cpu_to_node(0);
> +	struct apic_chip_data *data;
> +
>  	/*
> -	 * Use assign system here so it wont get accounted as allocated
> -	 * and moveable in the cpu hotplug check and it prevents managed
> -	 * irq reservation from touching it.
> +	 * For legacy IRQ's, start with assigning irq0 to irq15 to
> +	 * ISA_IRQ_VECTOR(i) for all cpu's.
>  	 */
> -	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
> -}
> -
> -void __init lapic_assign_system_vectors(void)
> -{
> -	unsigned int i, vector = 0;
> -
> -	for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
> -		irq_matrix_assign_system(vector_matrix, vector, false);
> -
> -	if (nr_legacy_irqs() > 1)
> -		lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
> -
> -	/* System vectors are reserved, online it */
> -	irq_matrix_online(vector_matrix);
> -
> -	/* Mark the preallocated legacy interrupts */
>  	for (i = 0; i < nr_legacy_irqs(); i++) {
> -		if (i != PIC_CASCADE_IR)
> -			irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
> +		data = legacy_irq_data[i] = alloc_apic_chip_data(node);
> +		BUG_ON(!data);
> +
> +		data->cfg.vector = ISA_IRQ_VECTOR(i);
> +		cpumask_setall(data->domain);
> +		irq_set_chip_data(i, data);
>  	}
>  }
> +#else
> +static inline void init_legacy_irqs(void) { }
> +#endif
>  
>  int __init arch_early_irq_init(void)
>  {
>  	struct fwnode_handle *fn;
>  
> +	init_legacy_irqs();
> +
>  	fn = irq_domain_alloc_named_fwnode("VECTOR");
>  	BUG_ON(!fn);
>  	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
> @@ -664,115 +458,100 @@ int __init arch_early_irq_init(void)
>  
>  	arch_init_msi_domain(x86_vector_domain);
>  
> +	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
>  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
> -
> -	/*
> -	 * Allocate the vector matrix allocator data structure and limit the
> -	 * search area.
> -	 */
> -	vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
> -					 FIRST_SYSTEM_VECTOR);
> -	BUG_ON(!vector_matrix);
> +	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
>  
>  	return arch_early_ioapic_init();
>  }
>  
> -#ifdef CONFIG_SMP
> -
> -static struct irq_desc *__setup_vector_irq(int vector)
> +/* Initialize vector_irq on a new cpu */
> +static void __setup_vector_irq(int cpu)
>  {
> -	int isairq = vector - ISA_IRQ_VECTOR(0);
> -
> -	/* Check whether the irq is in the legacy space */
> -	if (isairq < 0 || isairq >= nr_legacy_irqs())
> -		return VECTOR_UNUSED;
> -	/* Check whether the irq is handled by the IOAPIC */
> -	if (test_bit(isairq, &io_apic_irqs))
> -		return VECTOR_UNUSED;
> -	return irq_to_desc(isairq);
> -}
> +	struct apic_chip_data *data;
> +	struct irq_desc *desc;
> +	int irq, vector;
>  
> -/* Online the local APIC infrastructure and initialize the vectors */
> -void lapic_online(void)
> -{
> -	unsigned int vector;
> +	/* Mark the inuse vectors */
> +	for_each_irq_desc(irq, desc) {
> +		struct irq_data *idata = irq_desc_get_irq_data(desc);
>  
> -	lockdep_assert_held(&vector_lock);
> -
> -	/* Online the vector matrix array for this CPU */
> -	irq_matrix_online(vector_matrix);
> -
> -	/*
> -	 * The interrupt affinity logic never targets interrupts to offline
> -	 * CPUs. The exception are the legacy PIC interrupts. In general
> -	 * they are only targeted to CPU0, but depending on the platform
> -	 * they can be distributed to any online CPU in hardware. The
> -	 * kernel has no influence on that. So all active legacy vectors
> -	 * must be installed on all CPUs. All non legacy interrupts can be
> -	 * cleared.
> -	 */
> -	for (vector = 0; vector < NR_VECTORS; vector++)
> -		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
> -}
> +		data = apic_chip_data(idata);
> +		if (!data || !cpumask_test_cpu(cpu, data->domain))
> +			continue;
> +		vector = data->cfg.vector;
> +		per_cpu(vector_irq, cpu)[vector] = desc;
> +	}
> +	/* Mark the free vectors */
> +	for (vector = 0; vector < NR_VECTORS; ++vector) {
> +		desc = per_cpu(vector_irq, cpu)[vector];
> +		if (IS_ERR_OR_NULL(desc))
> +			continue;
>  
> -void lapic_offline(void)
> -{
> -	lock_vector_lock();
> -	irq_matrix_offline(vector_matrix);
> -	unlock_vector_lock();
> +		data = apic_chip_data(irq_desc_get_irq_data(desc));
> +		if (!cpumask_test_cpu(cpu, data->domain))
> +			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
> +	}
>  }
>  
> -static int apic_set_affinity(struct irq_data *irqd,
> -			     const struct cpumask *dest, bool force)
> +/*
> + * Setup the vector to irq mappings. Must be called with vector_lock held.
> + */
> +void setup_vector_irq(int cpu)
>  {
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> -	int err;
> +	int irq;
>  
> +	lockdep_assert_held(&vector_lock);
>  	/*
> -	 * Core code can call here for inactive interrupts. For inactive
> -	 * interrupts which use managed or reservation mode there is no
> -	 * point in going through the vector assignment right now as the
> -	 * activation will assign a vector which fits the destination
> -	 * cpumask. Let the core code store the destination mask and be
> -	 * done with it.
> +	 * On most of the platforms, legacy PIC delivers the interrupts on the
> +	 * boot cpu. But there are certain platforms where PIC interrupts are
> +	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
> +	 * legacy PIC, for the new cpu that is coming online, setup the static
> +	 * legacy vector to irq mapping:
>  	 */
> -	if (!irqd_is_activated(irqd) &&
> -	    (apicd->is_managed || apicd->can_reserve))
> -		return IRQ_SET_MASK_OK;
> +	for (irq = 0; irq < nr_legacy_irqs(); irq++)
> +		per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
>  
> -	raw_spin_lock(&vector_lock);
> -	cpumask_and(vector_searchmask, dest, cpu_online_mask);
> -	if (irqd_affinity_is_managed(irqd))
> -		err = assign_managed_vector(irqd, vector_searchmask);
> -	else
> -		err = assign_vector_locked(irqd, vector_searchmask);
> -	raw_spin_unlock(&vector_lock);
> -	return err ? err : IRQ_SET_MASK_OK;
> +	__setup_vector_irq(cpu);
>  }
>  
> -#else
> -# define apic_set_affinity	NULL
> -#endif
> -
> -static int apic_retrigger_irq(struct irq_data *irqd)
> +static int apic_retrigger_irq(struct irq_data *irq_data)
>  {
> -	struct apic_chip_data *apicd = apic_chip_data(irqd);
> +	struct apic_chip_data *data = apic_chip_data(irq_data);
>  	unsigned long flags;
> +	int cpu;
>  
>  	raw_spin_lock_irqsave(&vector_lock, flags);
> -	apic->send_IPI(apicd->cpu, apicd->vector);
> +	cpu = cpumask_first_and(data->domain, cpu_online_mask);
> +	apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
>  	raw_spin_unlock_irqrestore(&vector_lock, flags);
>  
>  	return 1;
>  }
>  
> -void apic_ack_edge(struct irq_data *irqd)
> +void apic_ack_edge(struct irq_data *data)
>  {
> -	irq_complete_move(irqd_cfg(irqd));
> -	irq_move_irq(irqd);
> +	irq_complete_move(irqd_cfg(data));
> +	irq_move_irq(data);
>  	ack_APIC_irq();
>  }
>  
> +static int apic_set_affinity(struct irq_data *irq_data,
> +			     const struct cpumask *dest, bool force)
> +{
> +	struct apic_chip_data *data = irq_data->chip_data;
> +	int err, irq = irq_data->irq;
> +
> +	if (!IS_ENABLED(CONFIG_SMP))
> +		return -EPERM;
> +
> +	if (!cpumask_intersects(dest, cpu_online_mask))
> +		return -EINVAL;
> +
> +	err = assign_irq_vector(irq, data, dest, irq_data);
> +	return err ? err : IRQ_SET_MASK_OK;
> +}
> +
>  static struct irq_chip lapic_controller = {
>  	.name			= "APIC",
>  	.irq_ack		= apic_ack_edge,
> @@ -781,98 +560,115 @@ static struct irq_chip lapic_controller = {
>  };
>  
>  #ifdef CONFIG_SMP
> +static void __send_cleanup_vector(struct apic_chip_data *data)
> +{
> +	raw_spin_lock(&vector_lock);
> +	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
> +	data->move_in_progress = 0;
> +	if (!cpumask_empty(data->old_domain))
> +		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
> +	raw_spin_unlock(&vector_lock);
> +}
>  
> -static void free_moved_vector(struct apic_chip_data *apicd)
> +void send_cleanup_vector(struct irq_cfg *cfg)
>  {
> -	unsigned int vector = apicd->prev_vector;
> -	unsigned int cpu = apicd->prev_cpu;
> -	bool managed = apicd->is_managed;
> +	struct apic_chip_data *data;
>  
> -	/*
> -	 * This should never happen. Managed interrupts are not
> -	 * migrated except on CPU down, which does not involve the
> -	 * cleanup vector. But try to keep the accounting correct
> -	 * nevertheless.
> -	 */
> -	WARN_ON_ONCE(managed);
> -
> -	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
> -	irq_matrix_free(vector_matrix, cpu, vector, managed);
> -	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
> -	hlist_del_init(&apicd->clist);
> -	apicd->prev_vector = 0;
> -	apicd->move_in_progress = 0;
> +	data = container_of(cfg, struct apic_chip_data, cfg);
> +	if (data->move_in_progress)
> +		__send_cleanup_vector(data);
>  }
>  
>  asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
>  {
> -	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
> -	struct apic_chip_data *apicd;
> -	struct hlist_node *tmp;
> +	unsigned vector, me;
>  
>  	entering_ack_irq();
> +
>  	/* Prevent vectors vanishing under us */
>  	raw_spin_lock(&vector_lock);
>  
> -	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
> -		unsigned int irr, vector = apicd->prev_vector;
> +	me = smp_processor_id();
> +	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
> +		struct apic_chip_data *data;
> +		struct irq_desc *desc;
> +		unsigned int irr;
> +
> +	retry:
> +		desc = __this_cpu_read(vector_irq[vector]);
> +		if (IS_ERR_OR_NULL(desc))
> +			continue;
> +
> +		if (!raw_spin_trylock(&desc->lock)) {
> +			raw_spin_unlock(&vector_lock);
> +			cpu_relax();
> +			raw_spin_lock(&vector_lock);
> +			goto retry;
> +		}
> +
> +		data = apic_chip_data(irq_desc_get_irq_data(desc));
> +		if (!data)
> +			goto unlock;
>  
>  		/*
> -		 * Paranoia: Check if the vector that needs to be cleaned
> -		 * up is registered at the APICs IRR. If so, then this is
> -		 * not the best time to clean it up. Clean it up in the
> -		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
> -		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
> -		 * priority external vector, so on return from this
> -		 * interrupt the device interrupt will happen first.
> +		 * Nothing to cleanup if irq migration is in progress
> +		 * or this cpu is not set in the cleanup mask.
> +		 */
> +		if (data->move_in_progress ||
> +		    !cpumask_test_cpu(me, data->old_domain))
> +			goto unlock;
> +
> +		/*
> +		 * We have two cases to handle here:
> +		 * 1) vector is unchanged but the target mask got reduced
> +		 * 2) vector and the target mask has changed
> +		 *
> +		 * #1 is obvious, but in #2 we have two vectors with the same
> +		 * irq descriptor: the old and the new vector. So we need to
> +		 * make sure that we only cleanup the old vector. The new
> +		 * vector has the current @vector number in the config and
> +		 * this cpu is part of the target mask. We better leave that
> +		 * one alone.
>  		 */
> +		if (vector == data->cfg.vector &&
> +		    cpumask_test_cpu(me, data->domain))
> +			goto unlock;
> +
>  		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
> -		if (irr & (1U << (vector % 32))) {
> +		/*
> +		 * Check if the vector that needs to be cleanedup is
> +		 * registered at the cpu's IRR. If so, then this is not
> +		 * the best time to clean it up. Lets clean it up in the
> +		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
> +		 * to myself.
> +		 */
> +		if (irr  & (1 << (vector % 32))) {
>  			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
> -			continue;
> +			goto unlock;
>  		}
> -		free_moved_vector(apicd);
> +		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
> +		cpumask_clear_cpu(me, data->old_domain);
> +unlock:
> +		raw_spin_unlock(&desc->lock);
>  	}
>  
>  	raw_spin_unlock(&vector_lock);
> -	exiting_irq();
> -}
>  
> -static void __send_cleanup_vector(struct apic_chip_data *apicd)
> -{
> -	unsigned int cpu;
> -
> -	raw_spin_lock(&vector_lock);
> -	apicd->move_in_progress = 0;
> -	cpu = apicd->prev_cpu;
> -	if (cpu_online(cpu)) {
> -		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
> -		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
> -	} else {
> -		apicd->prev_vector = 0;
> -	}
> -	raw_spin_unlock(&vector_lock);
> -}
> -
> -void send_cleanup_vector(struct irq_cfg *cfg)
> -{
> -	struct apic_chip_data *apicd;
> -
> -	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
> -	if (apicd->move_in_progress)
> -		__send_cleanup_vector(apicd);
> +	exiting_irq();
>  }
>  
>  static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
>  {
> -	struct apic_chip_data *apicd;
> +	unsigned me;
> +	struct apic_chip_data *data;
>  
> -	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
> -	if (likely(!apicd->move_in_progress))
> +	data = container_of(cfg, struct apic_chip_data, cfg);
> +	if (likely(!data->move_in_progress))
>  		return;
>  
> -	if (vector == apicd->vector && apicd->cpu == smp_processor_id())
> -		__send_cleanup_vector(apicd);
> +	me = smp_processor_id();
> +	if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
> +		__send_cleanup_vector(data);
>  }
>  
>  void irq_complete_move(struct irq_cfg *cfg)
> @@ -885,9 +681,10 @@ void irq_complete_move(struct irq_cfg *cfg)
>   */
>  void irq_force_complete_move(struct irq_desc *desc)
>  {
> -	struct apic_chip_data *apicd;
> -	struct irq_data *irqd;
> -	unsigned int vector;
> +	struct irq_data *irqdata;
> +	struct apic_chip_data *data;
> +	struct irq_cfg *cfg;
> +	unsigned int cpu;
>  
>  	/*
>  	 * The function is called for all descriptors regardless of which
> @@ -898,31 +695,43 @@ void irq_force_complete_move(struct irq_desc *desc)
>  	 * Check first that the chip_data is what we expect
>  	 * (apic_chip_data) before touching it any further.
>  	 */
> -	irqd = irq_domain_get_irq_data(x86_vector_domain,
> -				       irq_desc_get_irq(desc));
> -	if (!irqd)
> +	irqdata = irq_domain_get_irq_data(x86_vector_domain,
> +					  irq_desc_get_irq(desc));
> +	if (!irqdata)
>  		return;
>  
> -	raw_spin_lock(&vector_lock);
> -	apicd = apic_chip_data(irqd);
> -	if (!apicd)
> -		goto unlock;
> +	data = apic_chip_data(irqdata);
> +	cfg = data ? &data->cfg : NULL;
>  
> -	/*
> -	 * If prev_vector is empty, no action required.
> -	 */
> -	vector = apicd->prev_vector;
> -	if (!vector)
> -		goto unlock;
> +	if (!cfg)
> +		return;
>  
>  	/*
> -	 * This is tricky. If the cleanup of the old vector has not been
> +	 * This is tricky. If the cleanup of @data->old_domain has not been
>  	 * done yet, then the following setaffinity call will fail with
>  	 * -EBUSY. This can leave the interrupt in a stale state.
>  	 *
>  	 * All CPUs are stuck in stop machine with interrupts disabled so
>  	 * calling __irq_complete_move() would be completely pointless.
> -	 *
> +	 */
> +	raw_spin_lock(&vector_lock);
> +	/*
> +	 * Clean out all offline cpus (including the outgoing one) from the
> +	 * old_domain mask.
> +	 */
> +	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
> +
> +	/*
> +	 * If move_in_progress is cleared and the old_domain mask is empty,
> +	 * then there is nothing to cleanup. fixup_irqs() will take care of
> +	 * the stale vectors on the outgoing cpu.
> +	 */
> +	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
> +		raw_spin_unlock(&vector_lock);
> +		return;
> +	}
> +
> +	/*
>  	 * 1) The interrupt is in move_in_progress state. That means that we
>  	 *    have not seen an interrupt since the io_apic was reprogrammed to
>  	 *    the new vector.
> @@ -930,7 +739,7 @@ void irq_force_complete_move(struct irq_desc *desc)
>  	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
>  	 *    have not been processed yet.
>  	 */
> -	if (apicd->move_in_progress) {
> +	if (data->move_in_progress) {
>  		/*
>  		 * In theory there is a race:
>  		 *
> @@ -964,43 +773,21 @@ void irq_force_complete_move(struct irq_desc *desc)
>  		 * area arises.
>  		 */
>  		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
> -			irqd->irq, vector);
> +			irqdata->irq, cfg->old_vector);
>  	}
> -	free_moved_vector(apicd);
> -unlock:
> -	raw_spin_unlock(&vector_lock);
> -}
> -
> -#ifdef CONFIG_HOTPLUG_CPU
> -/*
> - * Note, this is not accurate accounting, but at least good enough to
> - * prevent that the actual interrupt move will run out of vectors.
> - */
> -int lapic_can_unplug_cpu(void)
> -{
> -	unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
> -	int ret = 0;
> +	/*
> +	 * If old_domain is not empty, then other cpus still have the irq
> +	 * descriptor set in their vector array. Clean it up.
> +	 */
> +	for_each_cpu(cpu, data->old_domain)
> +		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
>  
> -	raw_spin_lock(&vector_lock);
> -	tomove = irq_matrix_allocated(vector_matrix);
> -	avl = irq_matrix_available(vector_matrix, true);
> -	if (avl < tomove) {
> -		pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
> -			cpu, tomove, avl);
> -		ret = -ENOSPC;
> -		goto out;
> -	}
> -	rsvd = irq_matrix_reserved(vector_matrix);
> -	if (avl < rsvd) {
> -		pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
> -			rsvd, avl);
> -	}
> -out:
> +	/* Cleanup the left overs of the (half finished) move */
> +	cpumask_clear(data->old_domain);
> +	data->move_in_progress = 0;
>  	raw_spin_unlock(&vector_lock);
> -	return ret;
>  }
> -#endif /* HOTPLUG_CPU */
> -#endif /* SMP */
> +#endif
>  
>  static void __init print_APIC_field(int base)
>  {
> diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
> deleted file mode 100644
> index b107de381cb5..000000000000
> --- a/arch/x86/kernel/apic/x2apic.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -/* Common bits for X2APIC cluster/physical modes. */
> -
> -int x2apic_apic_id_valid(int apicid);
> -int x2apic_apic_id_registered(void);
> -void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
> -unsigned int x2apic_get_apic_id(unsigned long id);
> -u32 x2apic_set_apic_id(unsigned int id);
> -int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
> -void x2apic_send_IPI_self(int vector);
> diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
> index 622f13ca8a94..e216cf3d64d2 100644
> --- a/arch/x86/kernel/apic/x2apic_cluster.c
> +++ b/arch/x86/kernel/apic/x2apic_cluster.c
> @@ -9,24 +9,22 @@
>  #include <linux/cpu.h>
>  
>  #include <asm/smp.h>
> -#include "x2apic.h"
> -
> -struct cluster_mask {
> -	unsigned int	clusterid;
> -	int		node;
> -	struct cpumask	mask;
> -};
> +#include <asm/x2apic.h>
>  
>  static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
> +static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
>  static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
> -static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
> -static struct cluster_mask *cluster_hotplug_mask;
>  
>  static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
>  {
>  	return x2apic_enabled();
>  }
>  
> +static inline u32 x2apic_cluster(int cpu)
> +{
> +	return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
> +}
> +
>  static void x2apic_send_IPI(int cpu, int vector)
>  {
>  	u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
> @@ -38,34 +36,49 @@ static void x2apic_send_IPI(int cpu, int vector)
>  static void
>  __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
>  {
> -	unsigned int cpu, clustercpu;
> -	struct cpumask *tmpmsk;
> +	struct cpumask *cpus_in_cluster_ptr;
> +	struct cpumask *ipi_mask_ptr;
> +	unsigned int cpu, this_cpu;
>  	unsigned long flags;
>  	u32 dest;
>  
>  	x2apic_wrmsr_fence();
> +
>  	local_irq_save(flags);
>  
> -	tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
> -	cpumask_copy(tmpmsk, mask);
> -	/* If IPI should not be sent to self, clear current CPU */
> -	if (apic_dest != APIC_DEST_ALLINC)
> -		cpumask_clear_cpu(smp_processor_id(), tmpmsk);
> +	this_cpu = smp_processor_id();
>  
> -	/* Collapse cpus in a cluster so a single IPI per cluster is sent */
> -	for_each_cpu(cpu, tmpmsk) {
> -		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
> +	/*
> +	 * We are to modify mask, so we need an own copy
> +	 * and be sure it's manipulated with irq off.
> +	 */
> +	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
> +	cpumask_copy(ipi_mask_ptr, mask);
> +
> +	/*
> +	 * The idea is to send one IPI per cluster.
> +	 */
> +	for_each_cpu(cpu, ipi_mask_ptr) {
> +		unsigned long i;
>  
> +		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
>  		dest = 0;
> -		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
> -			dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
> +
> +		/* Collect cpus in cluster. */
> +		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
> +			if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
> +				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
> +		}
>  
>  		if (!dest)
>  			continue;
>  
>  		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
> -		/* Remove cluster CPUs from tmpmask */
> -		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
> +		/*
> +		 * Cluster sibling cpus should be discared now so
> +		 * we would not send IPI them second time.
> +		 */
> +		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
>  	}
>  
>  	local_irq_restore(flags);
> @@ -92,90 +105,125 @@ static void x2apic_send_IPI_all(int vector)
>  	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
>  }
>  
> -static u32 x2apic_calc_apicid(unsigned int cpu)
> +static int
> +x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
> +			  unsigned int *apicid)
>  {
> -	return per_cpu(x86_cpu_to_logical_apicid, cpu);
> -}
> -
> -static void init_x2apic_ldr(void)
> -{
> -	struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
> -	u32 cluster, apicid = apic_read(APIC_LDR);
> +	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
>  	unsigned int cpu;
> +	u32 dest = 0;
> +	u16 cluster;
>  
> -	this_cpu_write(x86_cpu_to_logical_apicid, apicid);
> +	cpu = cpumask_first(mask);
> +	if (cpu >= nr_cpu_ids)
> +		return -EINVAL;
>  
> -	if (cmsk)
> -		goto update;
> +	dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
> +	cluster = x2apic_cluster(cpu);
>  
> -	cluster = apicid >> 16;
> -	for_each_online_cpu(cpu) {
> -		cmsk = per_cpu(cluster_masks, cpu);
> -		/* Matching cluster found. Link and update it. */
> -		if (cmsk && cmsk->clusterid == cluster)
> -			goto update;
> +	cpumask_clear(effmsk);
> +	for_each_cpu(cpu, mask) {
> +		if (cluster != x2apic_cluster(cpu))
> +			continue;
> +		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
> +		cpumask_set_cpu(cpu, effmsk);
>  	}
> -	cmsk = cluster_hotplug_mask;
> -	cluster_hotplug_mask = NULL;
> -update:
> -	this_cpu_write(cluster_masks, cmsk);
> -	cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
> +
> +	*apicid = dest;
> +	return 0;
>  }
>  
> -static int alloc_clustermask(unsigned int cpu, int node)
> +static void init_x2apic_ldr(void)
>  {
> -	if (per_cpu(cluster_masks, cpu))
> -		return 0;
> -	/*
> -	 * If a hotplug spare mask exists, check whether it's on the right
> -	 * node. If not, free it and allocate a new one.
> -	 */
> -	if (cluster_hotplug_mask) {
> -		if (cluster_hotplug_mask->node == node)
> -			return 0;
> -		kfree(cluster_hotplug_mask);
> -	}
> +	unsigned int this_cpu = smp_processor_id();
> +	unsigned int cpu;
>  
> -	cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
> -					    GFP_KERNEL, node);
> -	if (!cluster_hotplug_mask)
> -		return -ENOMEM;
> -	cluster_hotplug_mask->node = node;
> -	return 0;
> +	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
> +
> +	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
> +	for_each_online_cpu(cpu) {
> +		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
> +			continue;
> +		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
> +		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
> +	}
>  }
>  
> +/*
> + * At CPU state changes, update the x2apic cluster sibling info.
> + */
>  static int x2apic_prepare_cpu(unsigned int cpu)
>  {
> -	if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
> +	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
>  		return -ENOMEM;
> -	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
> +
> +	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
> +		free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
>  		return -ENOMEM;
> +	}
> +
>  	return 0;
>  }
>  
> -static int x2apic_dead_cpu(unsigned int dead_cpu)
> +static int x2apic_dead_cpu(unsigned int this_cpu)
>  {
> -	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
> +	int cpu;
>  
> -	cpumask_clear_cpu(dead_cpu, &cmsk->mask);
> -	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
> +	for_each_online_cpu(cpu) {
> +		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
> +			continue;
> +		cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
> +		cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
> +	}
> +	free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
> +	free_cpumask_var(per_cpu(ipi_mask, this_cpu));
>  	return 0;
>  }
>  
>  static int x2apic_cluster_probe(void)
>  {
> +	int cpu = smp_processor_id();
> +	int ret;
> +
>  	if (!x2apic_mode)
>  		return 0;
>  
> -	if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
> -			      x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
> +	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
> +				x2apic_prepare_cpu, x2apic_dead_cpu);
> +	if (ret < 0) {
>  		pr_err("Failed to register X2APIC_PREPARE\n");
>  		return 0;
>  	}
> -	init_x2apic_ldr();
> +	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
>  	return 1;
>  }
>  
> +static const struct cpumask *x2apic_cluster_target_cpus(void)
> +{
> +	return cpu_all_mask;
> +}
> +
> +/*
> + * Each x2apic cluster is an allocation domain.
> + */
> +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
> +					     const struct cpumask *mask)
> +{
> +	/*
> +	 * To minimize vector pressure, default case of boot, device bringup
> +	 * etc will use a single cpu for the interrupt destination.
> +	 *
> +	 * On explicit migration requests coming from irqbalance etc,
> +	 * interrupts will be routed to the x2apic cluster (cluster-id
> +	 * derived from the first cpu in the mask) members specified
> +	 * in the mask.
> +	 */
> +	if (mask == x2apic_cluster_target_cpus())
> +		cpumask_copy(retmask, cpumask_of(cpu));
> +	else
> +		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
> +}
> +
>  static struct apic apic_x2apic_cluster __ro_after_init = {
>  
>  	.name				= "cluster x2apic",
> @@ -187,10 +235,12 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
>  	.irq_delivery_mode		= dest_LowestPrio,
>  	.irq_dest_mode			= 1, /* logical */
>  
> +	.target_cpus			= x2apic_cluster_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= APIC_DEST_LOGICAL,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= cluster_vector_allocation_domain,
>  	.init_apic_ldr			= init_x2apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -203,7 +253,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
>  	.get_apic_id			= x2apic_get_apic_id,
>  	.set_apic_id			= x2apic_set_apic_id,
>  
> -	.calc_dest_apicid		= x2apic_calc_apicid,
> +	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
>  
>  	.send_IPI			= x2apic_send_IPI,
>  	.send_IPI_mask			= x2apic_send_IPI_mask,
> diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
> index f8d9d69994e6..b94d35320f85 100644
> --- a/arch/x86/kernel/apic/x2apic_phys.c
> +++ b/arch/x86/kernel/apic/x2apic_phys.c
> @@ -7,8 +7,7 @@
>  #include <linux/dmar.h>
>  
>  #include <asm/smp.h>
> -#include <asm/ipi.h>
> -#include "x2apic.h"
> +#include <asm/x2apic.h>
>  
>  int x2apic_phys;
>  
> @@ -100,43 +99,6 @@ static int x2apic_phys_probe(void)
>  	return apic == &apic_x2apic_phys;
>  }
>  
> -/* Common x2apic functions, also used by x2apic_cluster */
> -int x2apic_apic_id_valid(int apicid)
> -{
> -	return 1;
> -}
> -
> -int x2apic_apic_id_registered(void)
> -{
> -	return 1;
> -}
> -
> -void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
> -{
> -	unsigned long cfg = __prepare_ICR(0, vector, dest);
> -	native_x2apic_icr_write(cfg, apicid);
> -}
> -
> -unsigned int x2apic_get_apic_id(unsigned long id)
> -{
> -	return id;
> -}
> -
> -u32 x2apic_set_apic_id(unsigned int id)
> -{
> -	return id;
> -}
> -
> -int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
> -{
> -	return initial_apicid >> index_msb;
> -}
> -
> -void x2apic_send_IPI_self(int vector)
> -{
> -	apic_write(APIC_SELF_IPI, vector);
> -}
> -
>  static struct apic apic_x2apic_phys __ro_after_init = {
>  
>  	.name				= "physical x2apic",
> @@ -148,10 +110,12 @@ static struct apic apic_x2apic_phys __ro_after_init = {
>  	.irq_delivery_mode		= dest_Fixed,
>  	.irq_dest_mode			= 0, /* physical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= 0,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	.init_apic_ldr			= init_x2apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -164,7 +128,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
>  	.get_apic_id			= x2apic_get_apic_id,
>  	.set_apic_id			= x2apic_set_apic_id,
>  
> -	.calc_dest_apicid		= apic_default_calc_apicid,
> +	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
>  
>  	.send_IPI			= x2apic_send_IPI,
>  	.send_IPI_mask			= x2apic_send_IPI_mask,
> diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
> index e1b8e8bf6b3c..2915c6d06821 100644
> --- a/arch/x86/kernel/apic/x2apic_uv_x.c
> +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
> @@ -568,9 +568,16 @@ static void uv_init_apic_ldr(void)
>  {
>  }
>  
> -static u32 apic_uv_calc_apicid(unsigned int cpu)
> +static int
> +uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
> +		      unsigned int *apicid)
>  {
> -	return apic_default_calc_apicid(cpu) | uv_apicid_hibits;
> +	int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
> +
> +	if (!ret)
> +		*apicid |= uv_apicid_hibits;
> +
> +	return ret;
>  }
>  
>  static unsigned int x2apic_get_apic_id(unsigned long x)
> @@ -583,7 +590,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
>  	return id;
>  }
>  
> -static u32 set_apic_id(unsigned int id)
> +static unsigned long set_apic_id(unsigned int id)
>  {
>  	/* CHECKME: Do we need to mask out the xapic extra bits? */
>  	return id;
> @@ -620,10 +627,12 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
>  	.irq_delivery_mode		= dest_Fixed,
>  	.irq_dest_mode			= 0, /* Physical */
>  
> +	.target_cpus			= online_target_cpus,
>  	.disable_esr			= 0,
>  	.dest_logical			= APIC_DEST_LOGICAL,
>  	.check_apicid_used		= NULL,
>  
> +	.vector_allocation_domain	= default_vector_allocation_domain,
>  	.init_apic_ldr			= uv_init_apic_ldr,
>  
>  	.ioapic_phys_id_map		= NULL,
> @@ -636,7 +645,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
>  	.get_apic_id			= x2apic_get_apic_id,
>  	.set_apic_id			= set_apic_id,
>  
> -	.calc_dest_apicid		= apic_uv_calc_apicid,
> +	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
>  
>  	.send_IPI			= uv_send_IPI_one,
>  	.send_IPI_mask			= uv_send_IPI_mask,
> diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
> index 86c4439f9d74..8f5cb2c7060c 100644
> --- a/arch/x86/kernel/i8259.c
> +++ b/arch/x86/kernel/i8259.c
> @@ -114,7 +114,6 @@ static void make_8259A_irq(unsigned int irq)
>  	io_apic_irqs &= ~(1<<irq);
>  	irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
>  	enable_irq(irq);
> -	lapic_assign_legacy_vector(irq, true);
>  }
>  
>  /*
> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
> index d985cef3984f..014cb2fc47ff 100644
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -223,7 +223,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy
>  		idt_init_desc(&desc, t);
>  		write_idt_entry(idt, t->vector, &desc);
>  		if (sys)
> -			set_bit(t->vector, system_vectors);
> +			set_bit(t->vector, used_vectors);
>  	}
>  }
>  
> @@ -311,14 +311,14 @@ void __init idt_setup_apic_and_irq_gates(void)
>  
>  	idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
>  
> -	for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
> +	for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
>  		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
>  		set_intr_gate(i, entry);
>  	}
>  
> -	for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
> +	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
>  #ifdef CONFIG_X86_LOCAL_APIC
> -		set_bit(i, system_vectors);
> +		set_bit(i, used_vectors);
>  		set_intr_gate(i, spurious_interrupt);
>  #else
>  		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
> @@ -356,7 +356,7 @@ void idt_invalidate(void *addr)
>  
>  void __init update_intr_gate(unsigned int n, const void *addr)
>  {
> -	if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
> +	if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
>  		return;
>  	set_intr_gate(n, addr);
>  }
> @@ -364,6 +364,6 @@ void __init update_intr_gate(unsigned int n, const void *addr)
>  void alloc_intr_gate(unsigned int n, const void *addr)
>  {
>  	BUG_ON(n < FIRST_SYSTEM_VECTOR);
> -	if (!test_and_set_bit(n, system_vectors))
> +	if (!test_and_set_bit(n, used_vectors))
>  		set_intr_gate(n, addr);
>  }
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index 49cfd9fe7589..52089c043160 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
>  	seq_puts(p, "  Machine check polls\n");
>  #endif
>  #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
> -	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
> +	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
>  		seq_printf(p, "%*s: ", prec, "HYP");
>  		for_each_online_cpu(j)
>  			seq_printf(p, "%10u ",
> @@ -333,6 +333,105 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
>  
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> +
> +/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
> + * below, which is protected by stop_machine().  Putting them on the stack
> + * results in a stack frame overflow.  Dynamically allocating could result in a
> + * failure so declare these two cpumasks as global.
> + */
> +static struct cpumask affinity_new, online_new;
> +
> +/*
> + * This cpu is going to be removed and its vectors migrated to the remaining
> + * online cpus.  Check to see if there are enough vectors in the remaining cpus.
> + * This function is protected by stop_machine().
> + */
> +int check_irq_vectors_for_cpu_disable(void)
> +{
> +	unsigned int this_cpu, vector, this_count, count;
> +	struct irq_desc *desc;
> +	struct irq_data *data;
> +	int cpu;
> +
> +	this_cpu = smp_processor_id();
> +	cpumask_copy(&online_new, cpu_online_mask);
> +	cpumask_clear_cpu(this_cpu, &online_new);
> +
> +	this_count = 0;
> +	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
> +		desc = __this_cpu_read(vector_irq[vector]);
> +		if (IS_ERR_OR_NULL(desc))
> +			continue;
> +		/*
> +		 * Protect against concurrent action removal, affinity
> +		 * changes etc.
> +		 */
> +		raw_spin_lock(&desc->lock);
> +		data = irq_desc_get_irq_data(desc);
> +		cpumask_copy(&affinity_new,
> +			     irq_data_get_affinity_mask(data));
> +		cpumask_clear_cpu(this_cpu, &affinity_new);
> +
> +		/* Do not count inactive or per-cpu irqs. */
> +		if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
> +			raw_spin_unlock(&desc->lock);
> +			continue;
> +		}
> +
> +		raw_spin_unlock(&desc->lock);
> +		/*
> +		 * A single irq may be mapped to multiple cpu's
> +		 * vector_irq[] (for example IOAPIC cluster mode).  In
> +		 * this case we have two possibilities:
> +		 *
> +		 * 1) the resulting affinity mask is empty; that is
> +		 * this the down'd cpu is the last cpu in the irq's
> +		 * affinity mask, or
> +		 *
> +		 * 2) the resulting affinity mask is no longer a
> +		 * subset of the online cpus but the affinity mask is
> +		 * not zero; that is the down'd cpu is the last online
> +		 * cpu in a user set affinity mask.
> +		 */
> +		if (cpumask_empty(&affinity_new) ||
> +		    !cpumask_subset(&affinity_new, &online_new))
> +			this_count++;
> +	}
> +	/* No need to check any further. */
> +	if (!this_count)
> +		return 0;
> +
> +	count = 0;
> +	for_each_online_cpu(cpu) {
> +		if (cpu == this_cpu)
> +			continue;
> +		/*
> +		 * We scan from FIRST_EXTERNAL_VECTOR to first system
> +		 * vector. If the vector is marked in the used vectors
> +		 * bitmap or an irq is assigned to it, we don't count
> +		 * it as available.
> +		 *
> +		 * As this is an inaccurate snapshot anyway, we can do
> +		 * this w/o holding vector_lock.
> +		 */
> +		for (vector = FIRST_EXTERNAL_VECTOR;
> +		     vector < FIRST_SYSTEM_VECTOR; vector++) {
> +			if (!test_bit(vector, used_vectors) &&
> +			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
> +				if (++count == this_count)
> +					return 0;
> +			}
> +		}
> +	}
> +
> +	if (count < this_count) {
> +		pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
> +			this_cpu, this_count, count);
> +		return -ERANGE;
> +	}
> +	return 0;
> +}
> +
>  /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
>  void fixup_irqs(void)
>  {
> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
> index 8da3e909e967..1e4094eba15e 100644
> --- a/arch/x86/kernel/irqinit.c
> +++ b/arch/x86/kernel/irqinit.c
> @@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
>  	struct irq_chip *chip = legacy_pic->chip;
>  	int i;
>  
> +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
> +	init_bsp_APIC();
> +#endif
>  	legacy_pic->init(0);
>  
>  	for (i = 0; i < nr_legacy_irqs(); i++)
> @@ -91,7 +94,6 @@ void __init native_init_IRQ(void)
>  	x86_init.irqs.pre_vector_init();
>  
>  	idt_setup_apic_and_irq_gates();
> -	lapic_assign_system_vectors();
>  
>  	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
>  		setup_irq(2, &irq2);
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 8af2e8d0c0a1..be33a5c63d20 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -136,6 +136,18 @@ RESERVE_BRK(dmi_alloc, 65536);
>  static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
>  unsigned long _brk_end = (unsigned long)__brk_base;
>  
> +#ifdef CONFIG_X86_64
> +int default_cpu_present_to_apicid(int mps_cpu)
> +{
> +	return __default_cpu_present_to_apicid(mps_cpu);
> +}
> +
> +int default_check_phys_apicid_present(int phys_apicid)
> +{
> +	return __default_check_phys_apicid_present(phys_apicid);
> +}
> +#endif
> +
>  struct boot_params boot_params;
>  
>  /*
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 3d01df7d7cf6..13bd986b7f90 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -253,14 +253,14 @@ static void notrace start_secondary(void *unused)
>  	check_tsc_sync_target();
>  
>  	/*
> -	 * Lock vector_lock, set CPU online and bring the vector
> -	 * allocator online. Online must be set with vector_lock held
> -	 * to prevent a concurrent irq setup/teardown from seeing a
> -	 * half valid vector space.
> +	 * Lock vector_lock and initialize the vectors on this cpu
> +	 * before setting the cpu online. We must set it online with
> +	 * vector_lock held to prevent a concurrent setup/teardown
> +	 * from seeing a half valid vector space.
>  	 */
>  	lock_vector_lock();
> +	setup_vector_irq(smp_processor_id());
>  	set_cpu_online(smp_processor_id(), true);
> -	lapic_online();
>  	unlock_vector_lock();
>  	cpu_set_state_online(smp_processor_id());
>  	x86_platform.nmi_init();
> @@ -1132,10 +1132,17 @@ static __init void disable_smp(void)
>  	cpumask_set_cpu(0, topology_core_cpumask(0));
>  }
>  
> +enum {
> +	SMP_OK,
> +	SMP_NO_CONFIG,
> +	SMP_NO_APIC,
> +	SMP_FORCE_UP,
> +};
> +
>  /*
>   * Various sanity checks.
>   */
> -static void __init smp_sanity_check(void)
> +static int __init smp_sanity_check(unsigned max_cpus)
>  {
>  	preempt_disable();
>  
> @@ -1172,6 +1179,16 @@ static void __init smp_sanity_check(void)
>  		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
>  	}
>  
> +	/*
> +	 * If we couldn't find an SMP configuration at boot time,
> +	 * get out of here now!
> +	 */
> +	if (!smp_found_config && !acpi_lapic) {
> +		preempt_enable();
> +		pr_notice("SMP motherboard not detected\n");
> +		return SMP_NO_CONFIG;
> +	}
> +
>  	/*
>  	 * Should not be necessary because the MP table should list the boot
>  	 * CPU too, but we do it for the sake of robustness anyway.
> @@ -1182,6 +1199,29 @@ static void __init smp_sanity_check(void)
>  		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
>  	}
>  	preempt_enable();
> +
> +	/*
> +	 * If we couldn't find a local APIC, then get out of here now!
> +	 */
> +	if (APIC_INTEGRATED(boot_cpu_apic_version) &&
> +	    !boot_cpu_has(X86_FEATURE_APIC)) {
> +		if (!disable_apic) {
> +			pr_err("BIOS bug, local APIC #%d not detected!...\n",
> +				boot_cpu_physical_apicid);
> +			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
> +		}
> +		return SMP_NO_APIC;
> +	}
> +
> +	/*
> +	 * If SMP should be disabled, then really disable it!
> +	 */
> +	if (!max_cpus) {
> +		pr_info("SMP mode deactivated\n");
> +		return SMP_FORCE_UP;
> +	}
> +
> +	return SMP_OK;
>  }
>  
>  static void __init smp_cpu_index_default(void)
> @@ -1196,18 +1236,9 @@ static void __init smp_cpu_index_default(void)
>  	}
>  }
>  
> -static void __init smp_get_logical_apicid(void)
> -{
> -	if (x2apic_mode)
> -		cpu0_logical_apicid = apic_read(APIC_LDR);
> -	else
> -		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
> -}
> -
>  /*
> - * Prepare for SMP bootup.
> - * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
> - *            for common interface support.
> + * Prepare for SMP bootup.  The MP table or ACPI has been read
> + * earlier.  Just do some sanity checking here and enable APIC mode.
>   */
>  void __init native_smp_prepare_cpus(unsigned int max_cpus)
>  {
> @@ -1239,27 +1270,31 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
>  
>  	set_cpu_sibling_map(0);
>  
> -	smp_sanity_check();
> -
> -	switch (apic_intr_mode) {
> -	case APIC_PIC:
> -	case APIC_VIRTUAL_WIRE_NO_CONFIG:
> +	switch (smp_sanity_check(max_cpus)) {
> +	case SMP_NO_CONFIG:
>  		disable_smp();
> +		if (APIC_init_uniprocessor())
> +			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
>  		return;
> -	case APIC_SYMMETRIC_IO_NO_ROUTING:
> +	case SMP_NO_APIC:
>  		disable_smp();
> -		/* Setup local timer */
> -		x86_init.timers.setup_percpu_clockev();
>  		return;
> -	case APIC_VIRTUAL_WIRE:
> -	case APIC_SYMMETRIC_IO:
> +	case SMP_FORCE_UP:
> +		disable_smp();
> +		apic_bsp_setup(false);
> +		return;
> +	case SMP_OK:
>  		break;
>  	}
>  
> -	/* Setup local timer */
> -	x86_init.timers.setup_percpu_clockev();
> +	if (read_apic_id() != boot_cpu_physical_apicid) {
> +		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
> +		     read_apic_id(), boot_cpu_physical_apicid);
> +		/* Or can we switch back to PIC here? */
> +	}
>  
> -	smp_get_logical_apicid();
> +	default_setup_apic_routing();
> +	cpu0_logical_apicid = apic_bsp_setup(false);
>  
>  	pr_info("CPU0: ");
>  	print_cpu_info(&cpu_data(0));
> @@ -1313,6 +1348,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
>  
>  	nmi_selftest();
>  	impress_friends();
> +	setup_ioapic_dest();
>  	mtrr_aps_init();
>  }
>  
> @@ -1471,14 +1507,13 @@ void cpu_disable_common(void)
>  	remove_cpu_from_maps(cpu);
>  	unlock_vector_lock();
>  	fixup_irqs();
> -	lapic_offline();
>  }
>  
>  int native_cpu_disable(void)
>  {
>  	int ret;
>  
> -	ret = lapic_can_unplug_cpu();
> +	ret = check_irq_vectors_for_cpu_disable();
>  	if (ret)
>  		return ret;
>  
> diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
> index 749d189f8cd4..879af864d99a 100644
> --- a/arch/x86/kernel/time.c
> +++ b/arch/x86/kernel/time.c
> @@ -85,11 +85,6 @@ void __init hpet_time_init(void)
>  static __init void x86_late_time_init(void)
>  {
>  	x86_init.timers.timer_init();
> -	/*
> -	 * After PIT/HPET timers init, select and setup
> -	 * the final interrupt mode for delivering IRQs.
> -	 */
> -	x86_init.irqs.intr_mode_init();
>  	tsc_init();
>  }
>  
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 989514c94a55..4e2eb01cb3bf 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -71,7 +71,7 @@
>  #include <asm/proto.h>
>  #endif
>  
> -DECLARE_BITMAP(system_vectors, NR_VECTORS);
> +DECLARE_BITMAP(used_vectors, NR_VECTORS);
>  
>  static inline void cond_local_irq_enable(struct pt_regs *regs)
>  {
> diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
> index 44685fb2a192..b034b1b14b9c 100644
> --- a/arch/x86/kernel/vsmp_64.c
> +++ b/arch/x86/kernel/vsmp_64.c
> @@ -26,6 +26,9 @@
>  
>  #define TOPOLOGY_REGISTER_OFFSET 0x10
>  
> +/* Flag below is initialized once during vSMP PCI initialization. */
> +static int irq_routing_comply = 1;
> +
>  #if defined CONFIG_PCI && defined CONFIG_PARAVIRT
>  /*
>   * Interrupt control on vSMPowered systems:
> @@ -102,6 +105,9 @@ static void __init set_vsmp_pv_ops(void)
>  	if (cap & ctl & BIT(8)) {
>  		ctl &= ~BIT(8);
>  
> +		/* Interrupt routing set to ignore */
> +		irq_routing_comply = 0;
> +
>  #ifdef CONFIG_PROC_FS
>  		/* Don't let users change irq affinity via procfs */
>  		no_irq_affinity = 1;
> @@ -205,10 +211,23 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
>  	return hard_smp_processor_id() >> index_msb;
>  }
>  
> +/*
> + * In vSMP, all cpus should be capable of handling interrupts, regardless of
> + * the APIC used.
> + */
> +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
> +					  const struct cpumask *mask)
> +{
> +	cpumask_setall(retmask);
> +}
> +
>  static void vsmp_apic_post_init(void)
>  {
>  	/* need to update phys_pkg_id */
>  	apic->phys_pkg_id = apicid_phys_pkg_id;
> +
> +	if (!irq_routing_comply)
> +		apic->vector_allocation_domain = fill_vector_allocation_domain;
>  }
>  
>  void __init vsmp_init(void)
> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index 1151ccd72ce9..c8fa4cd31903 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -57,7 +57,6 @@ struct x86_init_ops x86_init __initdata = {
>  		.pre_vector_init	= init_ISA_irqs,
>  		.intr_init		= native_init_IRQ,
>  		.trap_init		= x86_init_noop,
> -		.intr_mode_init		= apic_intr_mode_init
>  	},
>  
>  	.oem = {
> diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
> index 6b830d4cb4c8..30434b8708f2 100644
> --- a/arch/x86/xen/apic.c
> +++ b/arch/x86/xen/apic.c
> @@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
>  	return 0xfd;
>  }
>  
> -static u32 xen_set_apic_id(unsigned int x)
> +static unsigned long xen_set_apic_id(unsigned int x)
>  {
>  	WARN_ON(1);
>  	return x;
> @@ -161,10 +161,12 @@ static struct apic xen_pv_apic = {
>  	/* .irq_delivery_mode - used in native_compose_msi_msg only */
>  	/* .irq_dest_mode     - used in native_compose_msi_msg only */
>  
> +	.target_cpus			= default_target_cpus,
>  	.disable_esr			= 0,
>  	/* .dest_logical      -  default_send_IPI_ use it but we use our own. */
>  	.check_apicid_used		= default_check_apicid_used, /* Used on 32-bit */
>  
> +	.vector_allocation_domain	= flat_vector_allocation_domain,
>  	.init_apic_ldr			= xen_noop, /* setup_local_APIC calls it */
>  
>  	.ioapic_phys_id_map		= default_ioapic_phys_id_map, /* Used on 32-bit */
> @@ -177,7 +179,7 @@ static struct apic xen_pv_apic = {
>  	.get_apic_id 			= xen_get_apic_id,
>  	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
>  
> -	.calc_dest_apicid		= apic_flat_calc_apicid,
> +	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
>  
>  #ifdef CONFIG_SMP
>  	.send_IPI_mask 			= xen_send_IPI_mask,
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 5b2b3f3f6531..fbd054d6ac97 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -1230,7 +1230,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  	x86_platform.get_nmi_reason = xen_get_nmi_reason;
>  
>  	x86_init.resources.memory_setup = xen_memory_setup;
> -	x86_init.irqs.intr_mode_init	= x86_init_noop;
>  	x86_init.oem.arch_setup = xen_arch_setup;
>  	x86_init.oem.banner = xen_banner;
>  
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 7d5eb004091d..e2a739001c8a 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -4178,25 +4178,16 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
>  	irq_domain_free_irqs_common(domain, virq, nr_irqs);
>  }
>  
> -static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
> -			       struct amd_ir_data *ir_data,
> -			       struct irq_2_irte *irte_info,
> -			       struct irq_cfg *cfg);
> -
>  static int irq_remapping_activate(struct irq_domain *domain,
>  				  struct irq_data *irq_data, bool early)
>  {
>  	struct amd_ir_data *data = irq_data->chip_data;
>  	struct irq_2_irte *irte_info = &data->irq_2_irte;
>  	struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
> -	struct irq_cfg *cfg = irqd_cfg(irq_data);
> -
> -	if (!iommu)
> -		return 0;
>  
> -	iommu->irte_ops->activate(data->entry, irte_info->devid,
> -				  irte_info->index);
> -	amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
> +	if (iommu)
> +		iommu->irte_ops->activate(data->entry, irte_info->devid,
> +					  irte_info->index);
>  	return 0;
>  }
>  
> @@ -4284,22 +4275,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
>  	return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
>  }
>  
> -
> -static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
> -			       struct amd_ir_data *ir_data,
> -			       struct irq_2_irte *irte_info,
> -			       struct irq_cfg *cfg)
> -{
> -
> -	/*
> -	 * Atomically updates the IRTE with the new destination, vector
> -	 * and flushes the interrupt entry cache.
> -	 */
> -	iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
> -				      irte_info->index, cfg->vector,
> -				      cfg->dest_apicid);
> -}
> -
>  static int amd_ir_set_affinity(struct irq_data *data,
>  			       const struct cpumask *mask, bool force)
>  {
> @@ -4317,7 +4292,13 @@ static int amd_ir_set_affinity(struct irq_data *data,
>  	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
>  		return ret;
>  
> -	amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg);
> +	/*
> +	 * Atomically updates the IRTE with the new destination, vector
> +	 * and flushes the interrupt entry cache.
> +	 */
> +	iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
> +			    irte_info->index, cfg->vector, cfg->dest_apicid);
> +
>  	/*
>  	 * After this point, all the interrupts will start arriving
>  	 * at the new destination. So, time to cleanup the previous
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 76a193c7fcfc..324163330eaa 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1122,24 +1122,6 @@ struct irq_remap_ops intel_irq_remap_ops = {
>  	.get_irq_domain		= intel_get_irq_domain,
>  };
>  
> -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
> -{
> -	struct intel_ir_data *ir_data = irqd->chip_data;
> -	struct irte *irte = &ir_data->irte_entry;
> -	struct irq_cfg *cfg = irqd_cfg(irqd);
> -
> -	/*
> -	 * Atomically updates the IRTE with the new destination, vector
> -	 * and flushes the interrupt entry cache.
> -	 */
> -	irte->vector = cfg->vector;
> -	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
> -
> -	/* Update the hardware only if the interrupt is in remapped mode. */
> -	if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
> -		modify_irte(&ir_data->irq_2_iommu, irte);
> -}
> -
>  /*
>   * Migrate the IO-APIC irq in the presence of intr-remapping.
>   *
> @@ -1158,15 +1140,27 @@ static int
>  intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
>  		      bool force)
>  {
> -	struct irq_data *parent = data->parent_data;
> +	struct intel_ir_data *ir_data = data->chip_data;
> +	struct irte *irte = &ir_data->irte_entry;
>  	struct irq_cfg *cfg = irqd_cfg(data);
> +	struct irq_data *parent = data->parent_data;
>  	int ret;
>  
>  	ret = parent->chip->irq_set_affinity(parent, mask, force);
>  	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
>  		return ret;
>  
> -	intel_ir_reconfigure_irte(data, false);
> +	/*
> +	 * Atomically updates the IRTE with the new destination, vector
> +	 * and flushes the interrupt entry cache.
> +	 */
> +	irte->vector = cfg->vector;
> +	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
> +
> +	/* Update the hardware only if the interrupt is in remapped mode. */
> +	if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
> +		modify_irte(&ir_data->irq_2_iommu, irte);
> +
>  	/*
>  	 * After this point, all the interrupts will start arriving
>  	 * at the new destination. So, time to cleanup the previous
> @@ -1399,7 +1393,9 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
>  static int intel_irq_remapping_activate(struct irq_domain *domain,
>  					struct irq_data *irq_data, bool early)
>  {
> -	intel_ir_reconfigure_irte(irq_data, true);
> +	struct intel_ir_data *data = irq_data->chip_data;
> +
> +	modify_irte(&data->irq_2_iommu, &data->irte_entry);
>  	return 0;
>  }
>  
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index e06607167858..496ed9130600 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1441,8 +1441,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
>  		pci_msi_domain_update_chip_ops(info);
>  
>  	info->flags |= MSI_FLAG_ACTIVATE_EARLY;
> -	if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
> -		info->flags |= MSI_FLAG_MUST_REACTIVATE;
>  
>  	domain = msi_create_irq_domain(fwnode, info, parent);
>  	if (!domain)
> diff --git a/init/main.c b/init/main.c
> index dfec3809e740..642b88bd3c9b 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -664,12 +664,12 @@ asmlinkage __visible void __init start_kernel(void)
>  	debug_objects_mem_init();
>  	setup_per_cpu_pageset();
>  	numa_policy_init();
> -	acpi_early_init();
>  	if (late_time_init)
>  		late_time_init();
>  	calibrate_delay();
>  	pid_idr_init();
>  	anon_vma_init();
> +	acpi_early_init();
>  #ifdef CONFIG_X86
>  	if (efi_enabled(EFI_RUNTIME_SERVICES))
>  		efi_enter_virtual_mode();
> diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
> index 89e355866450..ac1a3e29d3b9 100644
> --- a/kernel/irq/Kconfig
> +++ b/kernel/irq/Kconfig
> @@ -100,9 +100,6 @@ config IRQ_TIMINGS
>  config GENERIC_IRQ_MATRIX_ALLOCATOR
>  	bool
>  
> -config GENERIC_IRQ_RESERVATION_MODE
> -	bool
> -
>  config IRQ_DOMAIN_DEBUG
>  	bool "Expose hardware/virtual IRQ mapping via debugfs"
>  	depends on IRQ_DOMAIN && DEBUG_FS
> -- 
> 2.15.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx