Files
lkl_linux/include/linux/smp.h
Peter Zijlstra 9ed7d75b2f x86/percpu: Relax smp_processor_id()
Nadav reported that since this_cpu_read() became asm-volatile, many
smp_processor_id() users generated worse code due to the extra
constraints.

However since smp_processor_id() is reading a stable value, we can use
__this_cpu_read().

While this does reduce text size somewhat, this mostly results in code
movement to .text.unlikely as a result of more/larger .cold.
subfunctions. Less text on the hotpath is good for I$.

  $ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o
  setup_APIC_ibs                                             90         98   -12,+20
  force_ibs_eilvt_setup                                     400        413   -57,+70
  pci_serr_error                                            109        104   -54,+49
  pci_serr_error                                            109        104   -54,+49
  unknown_nmi_error                                         125        120   -76,+71
  unknown_nmi_error                                         125        120   -76,+71
  io_check_error                                            125        132   -97,+104
  intel_thermal_interrupt                                   730        822   +92,+0
  intel_init_thermal                                        951        945   -6,+0
  generic_get_mtrr                                          301        294   -7,+0
  generic_get_mtrr                                          301        294   -7,+0
  generic_set_all                                           749        754   -44,+49
  get_fixed_ranges                                          352        360   -41,+49
  x86_acpi_suspend_lowlevel                                 369        363   -6,+0
  check_tsc_sync_source                                     412        412   -71,+71
  irq_migrate_all_off_this_cpu                              662        674   -14,+26
  clocksource_watchdog                                      748        748   -113,+113
  __perf_event_account_interrupt                            204        197   -7,+0
  attempt_merge                                            1748       1741   -7,+0
  intel_guc_send_ct                                        1424       1409   -15,+0
  __fini_doorbell                                           235        231   -4,+0
  bdw_set_cdclk                                             928        923   -5,+0
  gen11_dsi_disable                                        1571       1556   -15,+0
  gmbus_wait                                                493        488   -5,+0
  md_make_request                                           376        369   -7,+0
  __split_and_process_bio                                   543        536   -7,+0
  delay_tsc                                                  96         89   -7,+0
  hsw_disable_pc8                                           696        691   -5,+0
  tsc_verify_tsc_adjust                                     215        228   -22,+35
  cpuidle_driver_unref                                       56         49   -7,+0
  blk_account_io_completion                                 159        148   -11,+0
  mtrr_wrmsr                                                 95         99   -29,+33
  __intel_wait_for_register_fw                              401        419   +18,+0
  cpuidle_driver_ref                                         43         36   -7,+0
  cpuidle_get_driver                                         15          8   -7,+0
  blk_account_io_done                                       535        528   -7,+0
  irq_migrate_all_off_this_cpu                              662        674   -14,+26
  check_tsc_sync_source                                     412        412   -71,+71
  irq_wait_for_poll                                         170        163   -7,+0
  generic_end_io_acct                                       329        322   -7,+0
  x86_acpi_suspend_lowlevel                                 369        363   -6,+0
  nohz_balance_enter_idle                                   198        191   -7,+0
  generic_start_io_acct                                     254        247   -7,+0
  blk_account_io_start                                      341        334   -7,+0
  perf_event_task_tick                                      682        675   -7,+0
  intel_init_thermal                                        951        945   -6,+0
  amd_e400_c1e_apic_setup                                    47         51   -28,+32
  setup_APIC_eilvt                                          350        328   -22,+0
  hsw_enable_pc8                                           1611       1605   -6,+0
                                               total   12985947   12985892   -994,+939

Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-06-17 12:43:41 +02:00

246 lines
6.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SMP_H
#define __LINUX_SMP_H
/*
* Generic SMP support
* Alan Cox. <alan@redhat.com>
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/llist.h>
typedef void (*smp_call_func_t)(void *info);
struct __call_single_data {
struct llist_node llist;
smp_call_func_t func;
void *info;
unsigned int flags;
};
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
typedef struct __call_single_data call_single_data_t
__aligned(sizeof(struct __call_single_data));
/* total number of cpus in this system (may exceed NR_CPUS) */
extern unsigned int total_cpus;
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
int wait);
/*
* Call a function on all processors
*/
int on_each_cpu(smp_call_func_t func, void *info, int wait);
/*
* Call a function on processors specified by mask, which might include
* the local one.
*/
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
void *info, bool wait);
/*
* Call a function on each processor for which the supplied function
* cond_func returns a positive value. This may include the local
* processor.
*/
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags);
void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags, const struct cpumask *mask);
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
#ifdef CONFIG_SMP
#include <linux/preempt.h>
#include <linux/kernel.h>
#include <linux/compiler.h>
#include <linux/thread_info.h>
#include <asm/smp.h>
/*
* main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
* (defined in asm header):
*/
/*
* stops all CPUs but the current one:
*/
extern void smp_send_stop(void);
/*
* sends a 'reschedule' event to another CPU:
*/
extern void smp_send_reschedule(int cpu);
/*
* Prepare machine for booting other CPUs.
*/
extern void smp_prepare_cpus(unsigned int max_cpus);
/*
* Bring a CPU up
*/
extern int __cpu_up(unsigned int cpunum, struct task_struct *tidle);
/*
* Final polishing of CPUs
*/
extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
*/
int smp_call_function(smp_call_func_t func, void *info, int wait);
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait);
int smp_call_function_any(const struct cpumask *mask,
smp_call_func_t func, void *info, int wait);
void kick_all_cpus_sync(void);
void wake_up_all_idle_cpus(void);
/*
* Generic and arch helpers
*/
void __init call_function_init(void);
void generic_smp_call_function_single_interrupt(void);
#define generic_smp_call_function_interrupt \
generic_smp_call_function_single_interrupt
/*
* Mark the boot cpu "online" so that it can call console drivers in
* printk() and can access its per-cpu storage.
*/
void smp_prepare_boot_cpu(void);
extern unsigned int setup_max_cpus;
extern void __init setup_nr_cpu_ids(void);
extern void __init smp_init(void);
extern int __boot_cpu_id;
static inline int get_boot_cpu_id(void)
{
return __boot_cpu_id;
}
#else /* !SMP */
static inline void smp_send_stop(void) { }
/*
* These macros fold the SMP functionality into a single CPU system
*/
#define raw_smp_processor_id() 0
static inline int up_smp_call_function(smp_call_func_t func, void *info)
{
return 0;
}
#define smp_call_function(func, info, wait) \
(up_smp_call_function(func, info))
static inline void smp_send_reschedule(int cpu) { }
#define smp_prepare_boot_cpu() do {} while (0)
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void call_function_init(void) { }
static inline int
smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
void *info, int wait)
{
return smp_call_function_single(0, func, info, wait);
}
static inline void kick_all_cpus_sync(void) { }
static inline void wake_up_all_idle_cpus(void) { }
#ifdef CONFIG_UP_LATE_INIT
extern void __init up_late_init(void);
static inline void smp_init(void) { up_late_init(); }
#else
static inline void smp_init(void) { }
#endif
static inline int get_boot_cpu_id(void)
{
return 0;
}
#endif /* !SMP */
/**
* raw_processor_id() - get the current (unstable) CPU id
*
* For then you know what you are doing and need an unstable
* CPU id.
*/
/**
* smp_processor_id() - get the current (stable) CPU id
*
* This is the normal accessor to the CPU id and should be used
* whenever possible.
*
* The CPU id is stable when:
*
* - IRQs are disabled;
* - preemption is disabled;
* - the task is CPU affine.
*
* When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
* when smp_processor_id() is used when the CPU id is not stable.
*/
/*
* Allow the architecture to differentiate between a stable and unstable read.
* For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
* regular asm read for the stable.
*/
#ifndef __smp_processor_id
#define __smp_processor_id(x) raw_smp_processor_id(x)
#endif
#ifdef CONFIG_DEBUG_PREEMPT
extern unsigned int debug_smp_processor_id(void);
# define smp_processor_id() debug_smp_processor_id()
#else
# define smp_processor_id() __smp_processor_id()
#endif
#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
#define put_cpu() preempt_enable()
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:
*/
extern void arch_disable_smp_support(void);
extern void arch_enable_nonboot_cpus_begin(void);
extern void arch_enable_nonboot_cpus_end(void);
void smp_setup_processor_id(void);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
bool phys);
/* SMP core functions */
int smpcfd_prepare_cpu(unsigned int cpu);
int smpcfd_dead_cpu(unsigned int cpu);
int smpcfd_dying_cpu(unsigned int cpu);
#endif /* __LINUX_SMP_H */