Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar: "The main changes in this cycle are: - rwsem scalability improvements, phase #2, by Waiman Long, which are rather impressive: "On a 2-socket 40-core 80-thread Skylake system with 40 reader and writer locking threads, the min/mean/max locking operations done in a 5-second testing window before the patchset were: 40 readers, Iterations Min/Mean/Max = 1,807/1,808/1,810 40 writers, Iterations Min/Mean/Max = 1,807/50,344/151,255 After the patchset, they became: 40 readers, Iterations Min/Mean/Max = 30,057/31,359/32,741 40 writers, Iterations Min/Mean/Max = 94,466/95,845/97,098" There's a lot of changes to the locking implementation that makes it similar to qrwlock, including owner handoff for more fair locking. Another microbenchmark shows how across the spectrum the improvements are: "With a locking microbenchmark running on 5.1 based kernel, the total locking rates (in kops/s) on a 2-socket Skylake system with equal numbers of readers and writers (mixed) before and after this patchset were: # of Threads Before Patch After Patch ------------ ------------ ----------- 2 2,618 4,193 4 1,202 3,726 8 802 3,622 16 729 3,359 32 319 2,826 64 102 2,744" The changes are extensive and the patch-set has been through several iterations addressing various locking workloads. There might be more regressions, but unless they are pathological I believe we want to use this new implementation as the baseline going forward. - jump-label optimizations by Daniel Bristot de Oliveira: the primary motivation was to remove IPI disturbance of isolated RT-workload CPUs, which resulted in the implementation of batched jump-label updates. Beyond the improvement of the real-time characteristics kernel, in one test this patchset improved static key update overhead from 57 msecs to just 1.4 msecs - which is a nice speedup as well. - atomic64_t cross-arch type cleanups by Mark Rutland: over the last ~10 years of atomic64_t existence the various types used by the APIs only had to be self-consistent within each architecture - which means they became wildly inconsistent across architectures. Mark puts and end to this by reworking all the atomic64 implementations to use 's64' as the base type for atomic64_t, and to ensure that this type is consistently used for parameters and return values in the API, avoiding further problems in this area. - A large set of small improvements to lockdep by Yuyang Du: type cleanups, output cleanups, function return type and othr cleanups all around the place. - A set of percpu ops cleanups and fixes by Peter Zijlstra. - Misc other changes - please see the Git log for more details" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits) locking/lockdep: increase size of counters for lockdep statistics locking/atomics: Use sed(1) instead of non-standard head(1) option locking/lockdep: Move mark_lock() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING x86/jump_label: Make tp_vec_nr static x86/percpu: Optimize raw_cpu_xchg() x86/percpu, sched/fair: Avoid local_clock() x86/percpu, x86/irq: Relax {set,get}_irq_regs() x86/percpu: Relax smp_processor_id() x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}() locking/rwsem: Guard against making count negative locking/rwsem: Adaptive disabling of reader optimistic spinning locking/rwsem: Enable time-based spinning on reader-owned rwsem locking/rwsem: Make rwsem->owner an atomic_long_t locking/rwsem: Enable readers spinning on writer locking/rwsem: Clarify usage of owner's nonspinaable bit locking/rwsem: Wake up almost all readers in wait queue locking/rwsem: More optimal RT task handling of null owner locking/rwsem: Always release wait_lock before waking up tasks locking/rwsem: Implement lock handoff to prevent lock starvation locking/rwsem: Make rwsem_spin_on_owner() return owner state ...
2025-12-19 16:13:19 +09:00 · 2019-07-08 16:12:03 -07:00
parent 46f1ec23a4 9156e54576
commit e192832869
55 changed files with 2785 additions and 2017 deletions
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -203,11 +203,17 @@ struct lock_list {
 	struct lock_list		*parent;
 };

-/*
- * We record lock dependency chains, so that we can cache them:
+/**
+ * struct lock_chain - lock dependency chain record
+ *
+ * @irq_context: the same as irq_context in held_lock below
+ * @depth:       the number of held locks in this chain
+ * @base:        the index in chain_hlocks for this chain
+ * @entry:       the collided lock chains in lock_chain hash list
+ * @chain_key:   the hash key of this lock_chain
 */
 struct lock_chain {
-	/* see BUILD_BUG_ON()s in lookup_chain_cache() */
+	/* see BUILD_BUG_ON()s in add_chain_cache() */
 	unsigned int			irq_context :  2,
 					depth       :  6,
 					base	    : 24;
@@ -217,12 +223,8 @@ struct lock_chain {
 };

 #define MAX_LOCKDEP_KEYS_BITS		13
-/*
- * Subtract one because we offset hlock->class_idx by 1 in order
- * to make 0 mean no class. This avoids overflowing the class_idx
- * bitfield and hitting the BUG in hlock_class().
- */
-#define MAX_LOCKDEP_KEYS		((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
+#define MAX_LOCKDEP_KEYS		(1UL << MAX_LOCKDEP_KEYS_BITS)
+#define INITIAL_CHAIN_KEY		-1

 struct held_lock {
 	/*
@@ -247,6 +249,11 @@ struct held_lock {
 	u64 				waittime_stamp;
 	u64				holdtime_stamp;
 #endif
+	/*
+	 * class_idx is zero-indexed; it points to the element in
+	 * lock_classes this held lock instance belongs to. class_idx is in
+	 * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
+	 */
 	unsigned int			class_idx:MAX_LOCKDEP_KEYS_BITS;
 	/*
 	 * The lock-stack is unified in that the lock chains of interrupt
@@ -281,6 +288,8 @@ extern void lockdep_free_key_range(void *start, unsigned long size);
 extern asmlinkage void lockdep_sys_exit(void);
 extern void lockdep_set_selftest_task(struct task_struct *task);

+extern void lockdep_init_task(struct task_struct *task);
+
 extern void lockdep_off(void);
 extern void lockdep_on(void);

@@ -385,7 +394,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 		WARN_ON(debug_locks && !lockdep_is_held(l));	\
 	} while (0)

-#define lockdep_assert_held_exclusive(l)	do {			\
+#define lockdep_assert_held_write(l)	do {			\
 		WARN_ON(debug_locks && !lockdep_is_held_type(l, 0));	\
 	} while (0)

@@ -405,6 +414,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);

 #else /* !CONFIG_LOCKDEP */

+static inline void lockdep_init_task(struct task_struct *task)
+{
+}
+
 static inline void lockdep_off(void)
 {
 }
@@ -466,7 +479,7 @@ struct lockdep_map { };
 #define lockdep_is_held_type(l, r)		(1)

 #define lockdep_assert_held(l)			do { (void)(l); } while (0)
-#define lockdep_assert_held_exclusive(l)	do { (void)(l); } while (0)
+#define lockdep_assert_held_write(l)	do { (void)(l); } while (0)
 #define lockdep_assert_held_read(l)		do { (void)(l); } while (0)
 #define lockdep_assert_held_once(l)		do { (void)(l); } while (0)

@@ -497,7 +510,6 @@ enum xhlock_context_t {
 	{ .name = (_name), .key = (void *)(_key), }

 static inline void lockdep_invariant_state(bool force) {}
-static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}

 #ifdef CONFIG_LOCK_STAT