mirror of
https://github.com/lkl/linux.git
synced 2025-12-19 16:13:19 +09:00
Nadav reported that since this_cpu_read() became asm-volatile, many
smp_processor_id() users generated worse code due to the extra
constraints.
However since smp_processor_id() is reading a stable value, we can use
__this_cpu_read().
While this does reduce text size somewhat, this mostly results in code
movement to .text.unlikely as a result of more/larger .cold.
subfunctions. Less text on the hotpath is good for I$.
$ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o
setup_APIC_ibs 90 98 -12,+20
force_ibs_eilvt_setup 400 413 -57,+70
pci_serr_error 109 104 -54,+49
pci_serr_error 109 104 -54,+49
unknown_nmi_error 125 120 -76,+71
unknown_nmi_error 125 120 -76,+71
io_check_error 125 132 -97,+104
intel_thermal_interrupt 730 822 +92,+0
intel_init_thermal 951 945 -6,+0
generic_get_mtrr 301 294 -7,+0
generic_get_mtrr 301 294 -7,+0
generic_set_all 749 754 -44,+49
get_fixed_ranges 352 360 -41,+49
x86_acpi_suspend_lowlevel 369 363 -6,+0
check_tsc_sync_source 412 412 -71,+71
irq_migrate_all_off_this_cpu 662 674 -14,+26
clocksource_watchdog 748 748 -113,+113
__perf_event_account_interrupt 204 197 -7,+0
attempt_merge 1748 1741 -7,+0
intel_guc_send_ct 1424 1409 -15,+0
__fini_doorbell 235 231 -4,+0
bdw_set_cdclk 928 923 -5,+0
gen11_dsi_disable 1571 1556 -15,+0
gmbus_wait 493 488 -5,+0
md_make_request 376 369 -7,+0
__split_and_process_bio 543 536 -7,+0
delay_tsc 96 89 -7,+0
hsw_disable_pc8 696 691 -5,+0
tsc_verify_tsc_adjust 215 228 -22,+35
cpuidle_driver_unref 56 49 -7,+0
blk_account_io_completion 159 148 -11,+0
mtrr_wrmsr 95 99 -29,+33
__intel_wait_for_register_fw 401 419 +18,+0
cpuidle_driver_ref 43 36 -7,+0
cpuidle_get_driver 15 8 -7,+0
blk_account_io_done 535 528 -7,+0
irq_migrate_all_off_this_cpu 662 674 -14,+26
check_tsc_sync_source 412 412 -71,+71
irq_wait_for_poll 170 163 -7,+0
generic_end_io_acct 329 322 -7,+0
x86_acpi_suspend_lowlevel 369 363 -6,+0
nohz_balance_enter_idle 198 191 -7,+0
generic_start_io_acct 254 247 -7,+0
blk_account_io_start 341 334 -7,+0
perf_event_task_tick 682 675 -7,+0
intel_init_thermal 951 945 -6,+0
amd_e400_c1e_apic_setup 47 51 -28,+32
setup_APIC_eilvt 350 328 -22,+0
hsw_enable_pc8 1611 1605 -6,+0
total 12985947 12985892 -994,+939
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
246 lines
6.0 KiB
C
246 lines
6.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_SMP_H
|
|
#define __LINUX_SMP_H
|
|
|
|
/*
|
|
* Generic SMP support
|
|
* Alan Cox. <alan@redhat.com>
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/types.h>
|
|
#include <linux/list.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/init.h>
|
|
#include <linux/llist.h>
|
|
|
|
typedef void (*smp_call_func_t)(void *info);
|
|
struct __call_single_data {
|
|
struct llist_node llist;
|
|
smp_call_func_t func;
|
|
void *info;
|
|
unsigned int flags;
|
|
};
|
|
|
|
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
|
|
typedef struct __call_single_data call_single_data_t
|
|
__aligned(sizeof(struct __call_single_data));
|
|
|
|
/* total number of cpus in this system (may exceed NR_CPUS) */
|
|
extern unsigned int total_cpus;
|
|
|
|
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
|
|
int wait);
|
|
|
|
/*
|
|
* Call a function on all processors
|
|
*/
|
|
int on_each_cpu(smp_call_func_t func, void *info, int wait);
|
|
|
|
/*
|
|
* Call a function on processors specified by mask, which might include
|
|
* the local one.
|
|
*/
|
|
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
|
|
void *info, bool wait);
|
|
|
|
/*
|
|
* Call a function on each processor for which the supplied function
|
|
* cond_func returns a positive value. This may include the local
|
|
* processor.
|
|
*/
|
|
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
|
|
smp_call_func_t func, void *info, bool wait,
|
|
gfp_t gfp_flags);
|
|
|
|
void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
|
|
smp_call_func_t func, void *info, bool wait,
|
|
gfp_t gfp_flags, const struct cpumask *mask);
|
|
|
|
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
#include <linux/preempt.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/thread_info.h>
|
|
#include <asm/smp.h>
|
|
|
|
/*
|
|
* main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
|
|
* (defined in asm header):
|
|
*/
|
|
|
|
/*
|
|
* stops all CPUs but the current one:
|
|
*/
|
|
extern void smp_send_stop(void);
|
|
|
|
/*
|
|
* sends a 'reschedule' event to another CPU:
|
|
*/
|
|
extern void smp_send_reschedule(int cpu);
|
|
|
|
|
|
/*
|
|
* Prepare machine for booting other CPUs.
|
|
*/
|
|
extern void smp_prepare_cpus(unsigned int max_cpus);
|
|
|
|
/*
|
|
* Bring a CPU up
|
|
*/
|
|
extern int __cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
|
|
|
/*
|
|
* Final polishing of CPUs
|
|
*/
|
|
extern void smp_cpus_done(unsigned int max_cpus);
|
|
|
|
/*
|
|
* Call a function on all other processors
|
|
*/
|
|
int smp_call_function(smp_call_func_t func, void *info, int wait);
|
|
void smp_call_function_many(const struct cpumask *mask,
|
|
smp_call_func_t func, void *info, bool wait);
|
|
|
|
int smp_call_function_any(const struct cpumask *mask,
|
|
smp_call_func_t func, void *info, int wait);
|
|
|
|
void kick_all_cpus_sync(void);
|
|
void wake_up_all_idle_cpus(void);
|
|
|
|
/*
|
|
* Generic and arch helpers
|
|
*/
|
|
void __init call_function_init(void);
|
|
void generic_smp_call_function_single_interrupt(void);
|
|
#define generic_smp_call_function_interrupt \
|
|
generic_smp_call_function_single_interrupt
|
|
|
|
/*
|
|
* Mark the boot cpu "online" so that it can call console drivers in
|
|
* printk() and can access its per-cpu storage.
|
|
*/
|
|
void smp_prepare_boot_cpu(void);
|
|
|
|
extern unsigned int setup_max_cpus;
|
|
extern void __init setup_nr_cpu_ids(void);
|
|
extern void __init smp_init(void);
|
|
|
|
extern int __boot_cpu_id;
|
|
|
|
static inline int get_boot_cpu_id(void)
|
|
{
|
|
return __boot_cpu_id;
|
|
}
|
|
|
|
#else /* !SMP */
|
|
|
|
static inline void smp_send_stop(void) { }
|
|
|
|
/*
|
|
* These macros fold the SMP functionality into a single CPU system
|
|
*/
|
|
#define raw_smp_processor_id() 0
|
|
static inline int up_smp_call_function(smp_call_func_t func, void *info)
|
|
{
|
|
return 0;
|
|
}
|
|
#define smp_call_function(func, info, wait) \
|
|
(up_smp_call_function(func, info))
|
|
|
|
static inline void smp_send_reschedule(int cpu) { }
|
|
#define smp_prepare_boot_cpu() do {} while (0)
|
|
#define smp_call_function_many(mask, func, info, wait) \
|
|
(up_smp_call_function(func, info))
|
|
static inline void call_function_init(void) { }
|
|
|
|
static inline int
|
|
smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
|
|
void *info, int wait)
|
|
{
|
|
return smp_call_function_single(0, func, info, wait);
|
|
}
|
|
|
|
static inline void kick_all_cpus_sync(void) { }
|
|
static inline void wake_up_all_idle_cpus(void) { }
|
|
|
|
#ifdef CONFIG_UP_LATE_INIT
|
|
extern void __init up_late_init(void);
|
|
static inline void smp_init(void) { up_late_init(); }
|
|
#else
|
|
static inline void smp_init(void) { }
|
|
#endif
|
|
|
|
static inline int get_boot_cpu_id(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif /* !SMP */
|
|
|
|
/**
|
|
* raw_processor_id() - get the current (unstable) CPU id
|
|
*
|
|
* For then you know what you are doing and need an unstable
|
|
* CPU id.
|
|
*/
|
|
|
|
/**
|
|
* smp_processor_id() - get the current (stable) CPU id
|
|
*
|
|
* This is the normal accessor to the CPU id and should be used
|
|
* whenever possible.
|
|
*
|
|
* The CPU id is stable when:
|
|
*
|
|
* - IRQs are disabled;
|
|
* - preemption is disabled;
|
|
* - the task is CPU affine.
|
|
*
|
|
* When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
|
|
* when smp_processor_id() is used when the CPU id is not stable.
|
|
*/
|
|
|
|
/*
|
|
* Allow the architecture to differentiate between a stable and unstable read.
|
|
* For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
|
|
* regular asm read for the stable.
|
|
*/
|
|
#ifndef __smp_processor_id
|
|
#define __smp_processor_id(x) raw_smp_processor_id(x)
|
|
#endif
|
|
|
|
#ifdef CONFIG_DEBUG_PREEMPT
|
|
extern unsigned int debug_smp_processor_id(void);
|
|
# define smp_processor_id() debug_smp_processor_id()
|
|
#else
|
|
# define smp_processor_id() __smp_processor_id()
|
|
#endif
|
|
|
|
#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
|
|
#define put_cpu() preempt_enable()
|
|
|
|
/*
|
|
* Callback to arch code if there's nosmp or maxcpus=0 on the
|
|
* boot command line:
|
|
*/
|
|
extern void arch_disable_smp_support(void);
|
|
|
|
extern void arch_enable_nonboot_cpus_begin(void);
|
|
extern void arch_enable_nonboot_cpus_end(void);
|
|
|
|
void smp_setup_processor_id(void);
|
|
|
|
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
|
|
bool phys);
|
|
|
|
/* SMP core functions */
|
|
int smpcfd_prepare_cpu(unsigned int cpu);
|
|
int smpcfd_dead_cpu(unsigned int cpu);
|
|
int smpcfd_dying_cpu(unsigned int cpu);
|
|
|
|
#endif /* __LINUX_SMP_H */
|