1/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6#include <linux/sched/mm.h>
7#include <linux/proc_fs.h>
8#include <linux/smp.h>
9#include <linux/init.h>
10#include <linux/notifier.h>
11#include <linux/sched/signal.h>
12#include <linux/sched/hotplug.h>
13#include <linux/sched/isolation.h>
14#include <linux/sched/task.h>
15#include <linux/sched/smt.h>
16#include <linux/unistd.h>
17#include <linux/cpu.h>
18#include <linux/oom.h>
19#include <linux/rcupdate.h>
20#include <linux/delay.h>
21#include <linux/export.h>
22#include <linux/bug.h>
23#include <linux/kthread.h>
24#include <linux/stop_machine.h>
25#include <linux/mutex.h>
26#include <linux/gfp.h>
27#include <linux/suspend.h>
28#include <linux/lockdep.h>
29#include <linux/tick.h>
30#include <linux/irq.h>
31#include <linux/nmi.h>
32#include <linux/smpboot.h>
33#include <linux/relay.h>
34#include <linux/slab.h>
35#include <linux/scs.h>
36#include <linux/percpu-rwsem.h>
37#include <linux/cpuset.h>
38#include <linux/random.h>
39#include <linux/cc_platform.h>
40
41#include <trace/events/power.h>
42#define CREATE_TRACE_POINTS
43#include <trace/events/cpuhp.h>
44
45#include "smpboot.h"
46
47/**
48 * struct cpuhp_cpu_state - Per cpu hotplug state storage
49 * @state:	The current cpu state
50 * @target:	The target state
51 * @fail:	Current CPU hotplug callback state
52 * @thread:	Pointer to the hotplug thread
53 * @should_run:	Thread should execute
54 * @rollback:	Perform a rollback
55 * @single:	Single callback invocation
56 * @bringup:	Single callback bringup or teardown selector
57 * @node:	Remote CPU node; for multi-instance, do a
58 *		single entry callback for install/remove
59 * @last:	For multi-instance rollback, remember how far we got
60 * @cb_state:	The state for a single callback (install/uninstall)
61 * @result:	Result of the operation
62 * @ap_sync_state:	State for AP synchronization
63 * @done_up:	Signal completion to the issuer of the task for cpu-up
64 * @done_down:	Signal completion to the issuer of the task for cpu-down
65 */
66struct cpuhp_cpu_state {
67	enum cpuhp_state	state;
68	enum cpuhp_state	target;
69	enum cpuhp_state	fail;
70#ifdef CONFIG_SMP
71	struct task_struct	*thread;
72	bool			should_run;
73	bool			rollback;
74	bool			single;
75	bool			bringup;
76	struct hlist_node	*node;
77	struct hlist_node	*last;
78	enum cpuhp_state	cb_state;
79	int			result;
80	atomic_t		ap_sync_state;
81	struct completion	done_up;
82	struct completion	done_down;
83#endif
84};
85
86static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
87	.fail = CPUHP_INVALID,
88};
89
90#ifdef CONFIG_SMP
91cpumask_t cpus_booted_once_mask;
92#endif
93
94#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
95static struct lockdep_map cpuhp_state_up_map =
96	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
97static struct lockdep_map cpuhp_state_down_map =
98	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
99
100
101static inline void cpuhp_lock_acquire(bool bringup)
102{
103	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
104}
105
106static inline void cpuhp_lock_release(bool bringup)
107{
108	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
109}
110#else
111
112static inline void cpuhp_lock_acquire(bool bringup) { }
113static inline void cpuhp_lock_release(bool bringup) { }
114
115#endif
116
117/**
118 * struct cpuhp_step - Hotplug state machine step
119 * @name:	Name of the step
120 * @startup:	Startup function of the step
121 * @teardown:	Teardown function of the step
122 * @cant_stop:	Bringup/teardown can't be stopped at this step
123 * @multi_instance:	State has multiple instances which get added afterwards
124 */
125struct cpuhp_step {
126	const char		*name;
127	union {
128		int		(*single)(unsigned int cpu);
129		int		(*multi)(unsigned int cpu,
130					 struct hlist_node *node);
131	} startup;
132	union {
133		int		(*single)(unsigned int cpu);
134		int		(*multi)(unsigned int cpu,
135					 struct hlist_node *node);
136	} teardown;
137	/* private: */
138	struct hlist_head	list;
139	/* public: */
140	bool			cant_stop;
141	bool			multi_instance;
142};
143
144static DEFINE_MUTEX(cpuhp_state_mutex);
145static struct cpuhp_step cpuhp_hp_states[];
146
147static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
148{
149	return cpuhp_hp_states + state;
150}
151
152static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
153{
154	return bringup ? !step->startup.single : !step->teardown.single;
155}
156
157/**
158 * cpuhp_invoke_callback - Invoke the callbacks for a given state
159 * @cpu:	The cpu for which the callback should be invoked
160 * @state:	The state to do callbacks for
161 * @bringup:	True if the bringup callback should be invoked
162 * @node:	For multi-instance, do a single entry callback for install/remove
163 * @lastp:	For multi-instance rollback, remember how far we got
164 *
165 * Called from cpu hotplug and from the state register machinery.
166 *
167 * Return: %0 on success or a negative errno code
168 */
169static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
170				 bool bringup, struct hlist_node *node,
171				 struct hlist_node **lastp)
172{
173	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
174	struct cpuhp_step *step = cpuhp_get_step(state);
175	int (*cbm)(unsigned int cpu, struct hlist_node *node);
176	int (*cb)(unsigned int cpu);
177	int ret, cnt;
178
179	if (st->fail == state) {
180		st->fail = CPUHP_INVALID;
181		return -EAGAIN;
182	}
183
184	if (cpuhp_step_empty(bringup, step)) {
185		WARN_ON_ONCE(1);
186		return 0;
187	}
188
189	if (!step->multi_instance) {
190		WARN_ON_ONCE(lastp && *lastp);
191		cb = bringup ? step->startup.single : step->teardown.single;
192
193		trace_cpuhp_enter(cpu, st->target, state, cb);
194		ret = cb(cpu);
195		trace_cpuhp_exit(cpu, st->state, state, ret);
196		return ret;
197	}
198	cbm = bringup ? step->startup.multi : step->teardown.multi;
199
200	/* Single invocation for instance add/remove */
201	if (node) {
202		WARN_ON_ONCE(lastp && *lastp);
203		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
204		ret = cbm(cpu, node);
205		trace_cpuhp_exit(cpu, st->state, state, ret);
206		return ret;
207	}
208
209	/* State transition. Invoke on all instances */
210	cnt = 0;
211	hlist_for_each(node, &step->list) {
212		if (lastp && node == *lastp)
213			break;
214
215		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
216		ret = cbm(cpu, node);
217		trace_cpuhp_exit(cpu, st->state, state, ret);
218		if (ret) {
219			if (!lastp)
220				goto err;
221
222			*lastp = node;
223			return ret;
224		}
225		cnt++;
226	}
227	if (lastp)
228		*lastp = NULL;
229	return 0;
230err:
231	/* Rollback the instances if one failed */
232	cbm = !bringup ? step->startup.multi : step->teardown.multi;
233	if (!cbm)
234		return ret;
235
236	hlist_for_each(node, &step->list) {
237		if (!cnt--)
238			break;
239
240		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
241		ret = cbm(cpu, node);
242		trace_cpuhp_exit(cpu, st->state, state, ret);
243		/*
244		 * Rollback must not fail,
245		 */
246		WARN_ON_ONCE(ret);
247	}
248	return ret;
249}
250
251#ifdef CONFIG_SMP
252static bool cpuhp_is_ap_state(enum cpuhp_state state)
253{
254	/*
255	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
256	 * purposes as that state is handled explicitly in cpu_down.
257	 */
258	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
259}
260
261static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
262{
263	struct completion *done = bringup ? &st->done_up : &st->done_down;
264	wait_for_completion(done);
265}
266
267static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
268{
269	struct completion *done = bringup ? &st->done_up : &st->done_down;
270	complete(done);
271}
272
273/*
274 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
275 */
276static bool cpuhp_is_atomic_state(enum cpuhp_state state)
277{
278	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
279}
280
281/* Synchronization state management */
282enum cpuhp_sync_state {
283	SYNC_STATE_DEAD,
284	SYNC_STATE_KICKED,
285	SYNC_STATE_SHOULD_DIE,
286	SYNC_STATE_ALIVE,
287	SYNC_STATE_SHOULD_ONLINE,
288	SYNC_STATE_ONLINE,
289};
290
291#ifdef CONFIG_HOTPLUG_CORE_SYNC
292/**
293 * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
294 * @state:	The synchronization state to set
295 *
296 * No synchronization point. Just update of the synchronization state, but implies
297 * a full barrier so that the AP changes are visible before the control CPU proceeds.
298 */
299static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
300{
301	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
302
303	(void)atomic_xchg(st, state);
304}
305
306void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
307
308static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
309				      enum cpuhp_sync_state next_state)
310{
311	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
312	ktime_t now, end, start = ktime_get();
313	int sync;
314
315	end = start + 10ULL * NSEC_PER_SEC;
316
317	sync = atomic_read(st);
318	while (1) {
319		if (sync == state) {
320			if (!atomic_try_cmpxchg(st, &sync, next_state))
321				continue;
322			return true;
323		}
324
325		now = ktime_get();
326		if (now > end) {
327			/* Timeout. Leave the state unchanged */
328			return false;
329		} else if (now - start < NSEC_PER_MSEC) {
330			/* Poll for one millisecond */
331			arch_cpuhp_sync_state_poll();
332		} else {
333			usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
334		}
335		sync = atomic_read(st);
336	}
337	return true;
338}
339#else  /* CONFIG_HOTPLUG_CORE_SYNC */
340static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
341#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
342
343#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
344/**
345 * cpuhp_ap_report_dead - Update synchronization state to DEAD
346 *
347 * No synchronization point. Just update of the synchronization state.
348 */
349void cpuhp_ap_report_dead(void)
350{
351	cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
352}
353
354void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
355
356/*
357 * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
358 * because the AP cannot issue complete() at this stage.
359 */
360static void cpuhp_bp_sync_dead(unsigned int cpu)
361{
362	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
363	int sync = atomic_read(st);
364
365	do {
366		/* CPU can have reported dead already. Don't overwrite that! */
367		if (sync == SYNC_STATE_DEAD)
368			break;
369	} while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
370
371	if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
372		/* CPU reached dead state. Invoke the cleanup function */
373		arch_cpuhp_cleanup_dead_cpu(cpu);
374		return;
375	}
376
377	/* No further action possible. Emit message and give up. */
378	pr_err("CPU%u failed to report dead state\n", cpu);
379}
380#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
381static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
382#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
383
384#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
385/**
386 * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
387 *
388 * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
389 * for the BP to release it.
390 */
391void cpuhp_ap_sync_alive(void)
392{
393	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
394
395	cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
396
397	/* Wait for the control CPU to release it. */
398	while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
399		cpu_relax();
400}
401
402static bool cpuhp_can_boot_ap(unsigned int cpu)
403{
404	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
405	int sync = atomic_read(st);
406
407again:
408	switch (sync) {
409	case SYNC_STATE_DEAD:
410		/* CPU is properly dead */
411		break;
412	case SYNC_STATE_KICKED:
413		/* CPU did not come up in previous attempt */
414		break;
415	case SYNC_STATE_ALIVE:
416		/* CPU is stuck cpuhp_ap_sync_alive(). */
417		break;
418	default:
419		/* CPU failed to report online or dead and is in limbo state. */
420		return false;
421	}
422
423	/* Prepare for booting */
424	if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
425		goto again;
426
427	return true;
428}
429
430void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
431
432/*
433 * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
434 * because the AP cannot issue complete() so early in the bringup.
435 */
436static int cpuhp_bp_sync_alive(unsigned int cpu)
437{
438	int ret = 0;
439
440	if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
441		return 0;
442
443	if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
444		pr_err("CPU%u failed to report alive state\n", cpu);
445		ret = -EIO;
446	}
447
448	/* Let the architecture cleanup the kick alive mechanics. */
449	arch_cpuhp_cleanup_kick_cpu(cpu);
450	return ret;
451}
452#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
453static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
454static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
455#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
456
457/* Serializes the updates to cpu_online_mask, cpu_present_mask */
458static DEFINE_MUTEX(cpu_add_remove_lock);
459bool cpuhp_tasks_frozen;
460EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
461
462/*
463 * The following two APIs (cpu_maps_update_begin/done) must be used when
464 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
465 */
466void cpu_maps_update_begin(void)
467{
468	mutex_lock(&cpu_add_remove_lock);
469}
470
471void cpu_maps_update_done(void)
472{
473	mutex_unlock(&cpu_add_remove_lock);
474}
475
476/*
477 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
478 * Should always be manipulated under cpu_add_remove_lock
479 */
480static int cpu_hotplug_disabled;
481
482#ifdef CONFIG_HOTPLUG_CPU
483
484DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
485
486void cpus_read_lock(void)
487{
488	percpu_down_read(&cpu_hotplug_lock);
489}
490EXPORT_SYMBOL_GPL(cpus_read_lock);
491
492int cpus_read_trylock(void)
493{
494	return percpu_down_read_trylock(&cpu_hotplug_lock);
495}
496EXPORT_SYMBOL_GPL(cpus_read_trylock);
497
498void cpus_read_unlock(void)
499{
500	percpu_up_read(&cpu_hotplug_lock);
501}
502EXPORT_SYMBOL_GPL(cpus_read_unlock);
503
504void cpus_write_lock(void)
505{
506	percpu_down_write(&cpu_hotplug_lock);
507}
508
509void cpus_write_unlock(void)
510{
511	percpu_up_write(&cpu_hotplug_lock);
512}
513
514void lockdep_assert_cpus_held(void)
515{
516	/*
517	 * We can't have hotplug operations before userspace starts running,
518	 * and some init codepaths will knowingly not take the hotplug lock.
519	 * This is all valid, so mute lockdep until it makes sense to report
520	 * unheld locks.
521	 */
522	if (system_state < SYSTEM_RUNNING)
523		return;
524
525	percpu_rwsem_assert_held(&cpu_hotplug_lock);
526}
527
528#ifdef CONFIG_LOCKDEP
529int lockdep_is_cpus_held(void)
530{
531	return percpu_rwsem_is_held(&cpu_hotplug_lock);
532}
533#endif
534
535static void lockdep_acquire_cpus_lock(void)
536{
537	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
538}
539
540static void lockdep_release_cpus_lock(void)
541{
542	rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
543}
544
545/*
546 * Wait for currently running CPU hotplug operations to complete (if any) and
547 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
548 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
549 * hotplug path before performing hotplug operations. So acquiring that lock
550 * guarantees mutual exclusion from any currently running hotplug operations.
551 */
552void cpu_hotplug_disable(void)
553{
554	cpu_maps_update_begin();
555	cpu_hotplug_disabled++;
556	cpu_maps_update_done();
557}
558EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
559
560static void __cpu_hotplug_enable(void)
561{
562	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
563		return;
564	cpu_hotplug_disabled--;
565}
566
567void cpu_hotplug_enable(void)
568{
569	cpu_maps_update_begin();
570	__cpu_hotplug_enable();
571	cpu_maps_update_done();
572}
573EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
574
575#else
576
577static void lockdep_acquire_cpus_lock(void)
578{
579}
580
581static void lockdep_release_cpus_lock(void)
582{
583}
584
585#endif	/* CONFIG_HOTPLUG_CPU */
586
587/*
588 * Architectures that need SMT-specific errata handling during SMT hotplug
589 * should override this.
590 */
591void __weak arch_smt_update(void) { }
592
593#ifdef CONFIG_HOTPLUG_SMT
594
595enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
596static unsigned int cpu_smt_max_threads __ro_after_init;
597unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
598
599void __init cpu_smt_disable(bool force)
600{
601	if (!cpu_smt_possible())
602		return;
603
604	if (force) {
605		pr_info("SMT: Force disabled\n");
606		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
607	} else {
608		pr_info("SMT: disabled\n");
609		cpu_smt_control = CPU_SMT_DISABLED;
610	}
611	cpu_smt_num_threads = 1;
612}
613
614/*
615 * The decision whether SMT is supported can only be done after the full
616 * CPU identification. Called from architecture code.
617 */
618void __init cpu_smt_set_num_threads(unsigned int num_threads,
619				    unsigned int max_threads)
620{
621	WARN_ON(!num_threads || (num_threads > max_threads));
622
623	if (max_threads == 1)
624		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
625
626	cpu_smt_max_threads = max_threads;
627
628	/*
629	 * If SMT has been disabled via the kernel command line or SMT is
630	 * not supported, set cpu_smt_num_threads to 1 for consistency.
631	 * If enabled, take the architecture requested number of threads
632	 * to bring up into account.
633	 */
634	if (cpu_smt_control != CPU_SMT_ENABLED)
635		cpu_smt_num_threads = 1;
636	else if (num_threads < cpu_smt_num_threads)
637		cpu_smt_num_threads = num_threads;
638}
639
640static int __init smt_cmdline_disable(char *str)
641{
642	cpu_smt_disable(str && !strcmp(str, "force"));
643	return 0;
644}
645early_param("nosmt", smt_cmdline_disable);
646
647/*
648 * For Archicture supporting partial SMT states check if the thread is allowed.
649 * Otherwise this has already been checked through cpu_smt_max_threads when
650 * setting the SMT level.
651 */
652static inline bool cpu_smt_thread_allowed(unsigned int cpu)
653{
654#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
655	return topology_smt_thread_allowed(cpu);
656#else
657	return true;
658#endif
659}
660
661static inline bool cpu_bootable(unsigned int cpu)
662{
663	if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
664		return true;
665
666	/* All CPUs are bootable if controls are not configured */
667	if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
668		return true;
669
670	/* All CPUs are bootable if CPU is not SMT capable */
671	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
672		return true;
673
674	if (topology_is_primary_thread(cpu))
675		return true;
676
677	/*
678	 * On x86 it's required to boot all logical CPUs at least once so
679	 * that the init code can get a chance to set CR4.MCE on each
680	 * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
681	 * core will shutdown the machine.
682	 */
683	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
684}
685
686/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
687bool cpu_smt_possible(void)
688{
689	return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
690		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
691}
692EXPORT_SYMBOL_GPL(cpu_smt_possible);
693
694#else
695static inline bool cpu_bootable(unsigned int cpu) { return true; }
696#endif
697
698static inline enum cpuhp_state
699cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
700{
701	enum cpuhp_state prev_state = st->state;
702	bool bringup = st->state < target;
703
704	st->rollback = false;
705	st->last = NULL;
706
707	st->target = target;
708	st->single = false;
709	st->bringup = bringup;
710	if (cpu_dying(cpu) != !bringup)
711		set_cpu_dying(cpu, !bringup);
712
713	return prev_state;
714}
715
716static inline void
717cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
718		  enum cpuhp_state prev_state)
719{
720	bool bringup = !st->bringup;
721
722	st->target = prev_state;
723
724	/*
725	 * Already rolling back. No need invert the bringup value or to change
726	 * the current state.
727	 */
728	if (st->rollback)
729		return;
730
731	st->rollback = true;
732
733	/*
734	 * If we have st->last we need to undo partial multi_instance of this
735	 * state first. Otherwise start undo at the previous state.
736	 */
737	if (!st->last) {
738		if (st->bringup)
739			st->state--;
740		else
741			st->state++;
742	}
743
744	st->bringup = bringup;
745	if (cpu_dying(cpu) != !bringup)
746		set_cpu_dying(cpu, !bringup);
747}
748
749/* Regular hotplug invocation of the AP hotplug thread */
750static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
751{
752	if (!st->single && st->state == st->target)
753		return;
754
755	st->result = 0;
756	/*
757	 * Make sure the above stores are visible before should_run becomes
758	 * true. Paired with the mb() above in cpuhp_thread_fun()
759	 */
760	smp_mb();
761	st->should_run = true;
762	wake_up_process(st->thread);
763	wait_for_ap_thread(st, st->bringup);
764}
765
766static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
767			 enum cpuhp_state target)
768{
769	enum cpuhp_state prev_state;
770	int ret;
771
772	prev_state = cpuhp_set_state(cpu, st, target);
773	__cpuhp_kick_ap(st);
774	if ((ret = st->result)) {
775		cpuhp_reset_state(cpu, st, prev_state);
776		__cpuhp_kick_ap(st);
777	}
778
779	return ret;
780}
781
782static int bringup_wait_for_ap_online(unsigned int cpu)
783{
784	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
785
786	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
787	wait_for_ap_thread(st, true);
788	if (WARN_ON_ONCE((!cpu_online(cpu))))
789		return -ECANCELED;
790
791	/* Unpark the hotplug thread of the target cpu */
792	kthread_unpark(st->thread);
793
794	/*
795	 * SMT soft disabling on X86 requires to bring the CPU out of the
796	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
797	 * CPU marked itself as booted_once in notify_cpu_starting() so the
798	 * cpu_bootable() check will now return false if this is not the
799	 * primary sibling.
800	 */
801	if (!cpu_bootable(cpu))
802		return -ECANCELED;
803	return 0;
804}
805
806#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
807static int cpuhp_kick_ap_alive(unsigned int cpu)
808{
809	if (!cpuhp_can_boot_ap(cpu))
810		return -EAGAIN;
811
812	return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
813}
814
815static int cpuhp_bringup_ap(unsigned int cpu)
816{
817	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
818	int ret;
819
820	/*
821	 * Some architectures have to walk the irq descriptors to
822	 * setup the vector space for the cpu which comes online.
823	 * Prevent irq alloc/free across the bringup.
824	 */
825	irq_lock_sparse();
826
827	ret = cpuhp_bp_sync_alive(cpu);
828	if (ret)
829		goto out_unlock;
830
831	ret = bringup_wait_for_ap_online(cpu);
832	if (ret)
833		goto out_unlock;
834
835	irq_unlock_sparse();
836
837	if (st->target <= CPUHP_AP_ONLINE_IDLE)
838		return 0;
839
840	return cpuhp_kick_ap(cpu, st, st->target);
841
842out_unlock:
843	irq_unlock_sparse();
844	return ret;
845}
846#else
847static int bringup_cpu(unsigned int cpu)
848{
849	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
850	struct task_struct *idle = idle_thread_get(cpu);
851	int ret;
852
853	if (!cpuhp_can_boot_ap(cpu))
854		return -EAGAIN;
855
856	/*
857	 * Some architectures have to walk the irq descriptors to
858	 * setup the vector space for the cpu which comes online.
859	 *
860	 * Prevent irq alloc/free across the bringup by acquiring the
861	 * sparse irq lock. Hold it until the upcoming CPU completes the
862	 * startup in cpuhp_online_idle() which allows to avoid
863	 * intermediate synchronization points in the architecture code.
864	 */
865	irq_lock_sparse();
866
867	ret = __cpu_up(cpu, idle);
868	if (ret)
869		goto out_unlock;
870
871	ret = cpuhp_bp_sync_alive(cpu);
872	if (ret)
873		goto out_unlock;
874
875	ret = bringup_wait_for_ap_online(cpu);
876	if (ret)
877		goto out_unlock;
878
879	irq_unlock_sparse();
880
881	if (st->target <= CPUHP_AP_ONLINE_IDLE)
882		return 0;
883
884	return cpuhp_kick_ap(cpu, st, st->target);
885
886out_unlock:
887	irq_unlock_sparse();
888	return ret;
889}
890#endif
891
892static int finish_cpu(unsigned int cpu)
893{
894	struct task_struct *idle = idle_thread_get(cpu);
895	struct mm_struct *mm = idle->active_mm;
896
897	/*
898	 * idle_task_exit() will have switched to &init_mm, now
899	 * clean up any remaining active_mm state.
900	 */
901	if (mm != &init_mm)
902		idle->active_mm = &init_mm;
903	mmdrop_lazy_tlb(mm);
904	return 0;
905}
906
907/*
908 * Hotplug state machine related functions
909 */
910
911/*
912 * Get the next state to run. Empty ones will be skipped. Returns true if a
913 * state must be run.
914 *
915 * st->state will be modified ahead of time, to match state_to_run, as if it
916 * has already ran.
917 */
918static bool cpuhp_next_state(bool bringup,
919			     enum cpuhp_state *state_to_run,
920			     struct cpuhp_cpu_state *st,
921			     enum cpuhp_state target)
922{
923	do {
924		if (bringup) {
925			if (st->state >= target)
926				return false;
927
928			*state_to_run = ++st->state;
929		} else {
930			if (st->state <= target)
931				return false;
932
933			*state_to_run = st->state--;
934		}
935
936		if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
937			break;
938	} while (true);
939
940	return true;
941}
942
943static int __cpuhp_invoke_callback_range(bool bringup,
944					 unsigned int cpu,
945					 struct cpuhp_cpu_state *st,
946					 enum cpuhp_state target,
947					 bool nofail)
948{
949	enum cpuhp_state state;
950	int ret = 0;
951
952	while (cpuhp_next_state(bringup, &state, st, target)) {
953		int err;
954
955		err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
956		if (!err)
957			continue;
958
959		if (nofail) {
960			pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
961				cpu, bringup ? "UP" : "DOWN",
962				cpuhp_get_step(st->state)->name,
963				st->state, err);
964			ret = -1;
965		} else {
966			ret = err;
967			break;
968		}
969	}
970
971	return ret;
972}
973
974static inline int cpuhp_invoke_callback_range(bool bringup,
975					      unsigned int cpu,
976					      struct cpuhp_cpu_state *st,
977					      enum cpuhp_state target)
978{
979	return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
980}
981
982static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
983						      unsigned int cpu,
984						      struct cpuhp_cpu_state *st,
985						      enum cpuhp_state target)
986{
987	__cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
988}
989
990static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
991{
992	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
993		return true;
994	/*
995	 * When CPU hotplug is disabled, then taking the CPU down is not
996	 * possible because takedown_cpu() and the architecture and
997	 * subsystem specific mechanisms are not available. So the CPU
998	 * which would be completely unplugged again needs to stay around
999	 * in the current state.
1000	 */
1001	return st->state <= CPUHP_BRINGUP_CPU;
1002}
1003
1004static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1005			      enum cpuhp_state target)
1006{
1007	enum cpuhp_state prev_state = st->state;
1008	int ret = 0;
1009
1010	ret = cpuhp_invoke_callback_range(true, cpu, st, target);
1011	if (ret) {
1012		pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
1013			 ret, cpu, cpuhp_get_step(st->state)->name,
1014			 st->state);
1015
1016		cpuhp_reset_state(cpu, st, prev_state);
1017		if (can_rollback_cpu(st))
1018			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
1019							    prev_state));
1020	}
1021	return ret;
1022}
1023
1024/*
1025 * The cpu hotplug threads manage the bringup and teardown of the cpus
1026 */
1027static int cpuhp_should_run(unsigned int cpu)
1028{
1029	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1030
1031	return st->should_run;
1032}
1033
1034/*
1035 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
1036 * callbacks when a state gets [un]installed at runtime.
1037 *
1038 * Each invocation of this function by the smpboot thread does a single AP
1039 * state callback.
1040 *
1041 * It has 3 modes of operation:
1042 *  - single: runs st->cb_state
1043 *  - up:     runs ++st->state, while st->state < st->target
1044 *  - down:   runs st->state--, while st->state > st->target
1045 *
1046 * When complete or on error, should_run is cleared and the completion is fired.
1047 */
1048static void cpuhp_thread_fun(unsigned int cpu)
1049{
1050	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1051	bool bringup = st->bringup;
1052	enum cpuhp_state state;
1053
1054	if (WARN_ON_ONCE(!st->should_run))
1055		return;
1056
1057	/*
1058	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
1059	 * that if we see ->should_run we also see the rest of the state.
1060	 */
1061	smp_mb();
1062
1063	/*
1064	 * The BP holds the hotplug lock, but we're now running on the AP,
1065	 * ensure that anybody asserting the lock is held, will actually find
1066	 * it so.
1067	 */
1068	lockdep_acquire_cpus_lock();
1069	cpuhp_lock_acquire(bringup);
1070
1071	if (st->single) {
1072		state = st->cb_state;
1073		st->should_run = false;
1074	} else {
1075		st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
1076		if (!st->should_run)
1077			goto end;
1078	}
1079
1080	WARN_ON_ONCE(!cpuhp_is_ap_state(state));
1081
1082	if (cpuhp_is_atomic_state(state)) {
1083		local_irq_disable();
1084		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1085		local_irq_enable();
1086
1087		/*
1088		 * STARTING/DYING must not fail!
1089		 */
1090		WARN_ON_ONCE(st->result);
1091	} else {
1092		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1093	}
1094
1095	if (st->result) {
1096		/*
1097		 * If we fail on a rollback, we're up a creek without no
1098		 * paddle, no way forward, no way back. We loose, thanks for
1099		 * playing.
1100		 */
1101		WARN_ON_ONCE(st->rollback);
1102		st->should_run = false;
1103	}
1104
1105end:
1106	cpuhp_lock_release(bringup);
1107	lockdep_release_cpus_lock();
1108
1109	if (!st->should_run)
1110		complete_ap_thread(st, bringup);
1111}
1112
1113/* Invoke a single callback on a remote cpu */
1114static int
1115cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
1116			 struct hlist_node *node)
1117{
1118	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1119	int ret;
1120
1121	if (!cpu_online(cpu))
1122		return 0;
1123
1124	cpuhp_lock_acquire(false);
1125	cpuhp_lock_release(false);
1126
1127	cpuhp_lock_acquire(true);
1128	cpuhp_lock_release(true);
1129
1130	/*
1131	 * If we are up and running, use the hotplug thread. For early calls
1132	 * we invoke the thread function directly.
1133	 */
1134	if (!st->thread)
1135		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1136
1137	st->rollback = false;
1138	st->last = NULL;
1139
1140	st->node = node;
1141	st->bringup = bringup;
1142	st->cb_state = state;
1143	st->single = true;
1144
1145	__cpuhp_kick_ap(st);
1146
1147	/*
1148	 * If we failed and did a partial, do a rollback.
1149	 */
1150	if ((ret = st->result) && st->last) {
1151		st->rollback = true;
1152		st->bringup = !bringup;
1153
1154		__cpuhp_kick_ap(st);
1155	}
1156
1157	/*
1158	 * Clean up the leftovers so the next hotplug operation wont use stale
1159	 * data.
1160	 */
1161	st->node = st->last = NULL;
1162	return ret;
1163}
1164
1165static int cpuhp_kick_ap_work(unsigned int cpu)
1166{
1167	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1168	enum cpuhp_state prev_state = st->state;
1169	int ret;
1170
1171	cpuhp_lock_acquire(false);
1172	cpuhp_lock_release(false);
1173
1174	cpuhp_lock_acquire(true);
1175	cpuhp_lock_release(true);
1176
1177	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
1178	ret = cpuhp_kick_ap(cpu, st, st->target);
1179	trace_cpuhp_exit(cpu, st->state, prev_state, ret);
1180
1181	return ret;
1182}
1183
1184static struct smp_hotplug_thread cpuhp_threads = {
1185	.store			= &cpuhp_state.thread,
1186	.thread_should_run	= cpuhp_should_run,
1187	.thread_fn		= cpuhp_thread_fun,
1188	.thread_comm		= "cpuhp/%u",
1189	.selfparking		= true,
1190};
1191
1192static __init void cpuhp_init_state(void)
1193{
1194	struct cpuhp_cpu_state *st;
1195	int cpu;
1196
1197	for_each_possible_cpu(cpu) {
1198		st = per_cpu_ptr(&cpuhp_state, cpu);
1199		init_completion(&st->done_up);
1200		init_completion(&st->done_down);
1201	}
1202}
1203
1204void __init cpuhp_threads_init(void)
1205{
1206	cpuhp_init_state();
1207	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
1208	kthread_unpark(this_cpu_read(cpuhp_state.thread));
1209}
1210
1211#ifdef CONFIG_HOTPLUG_CPU
1212#ifndef arch_clear_mm_cpumask_cpu
1213#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
1214#endif
1215
1216/**
1217 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
1218 * @cpu: a CPU id
1219 *
1220 * This function walks all processes, finds a valid mm struct for each one and
1221 * then clears a corresponding bit in mm's cpumask.  While this all sounds
1222 * trivial, there are various non-obvious corner cases, which this function
1223 * tries to solve in a safe manner.
1224 *
1225 * Also note that the function uses a somewhat relaxed locking scheme, so it may
1226 * be called only for an already offlined CPU.
1227 */
1228void clear_tasks_mm_cpumask(int cpu)
1229{
1230	struct task_struct *p;
1231
1232	/*
1233	 * This function is called after the cpu is taken down and marked
1234	 * offline, so its not like new tasks will ever get this cpu set in
1235	 * their mm mask. -- Peter Zijlstra
1236	 * Thus, we may use rcu_read_lock() here, instead of grabbing
1237	 * full-fledged tasklist_lock.
1238	 */
1239	WARN_ON(cpu_online(cpu));
1240	rcu_read_lock();
1241	for_each_process(p) {
1242		struct task_struct *t;
1243
1244		/*
1245		 * Main thread might exit, but other threads may still have
1246		 * a valid mm. Find one.
1247		 */
1248		t = find_lock_task_mm(p);
1249		if (!t)
1250			continue;
1251		arch_clear_mm_cpumask_cpu(cpu, t->mm);
1252		task_unlock(t);
1253	}
1254	rcu_read_unlock();
1255}
1256
1257/* Take this CPU down. */
1258static int take_cpu_down(void *_param)
1259{
1260	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1261	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
1262	int err, cpu = smp_processor_id();
1263
1264	/* Ensure this CPU doesn't handle any more interrupts. */
1265	err = __cpu_disable();
1266	if (err < 0)
1267		return err;
1268
1269	/*
1270	 * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1271	 * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
1272	 */
1273	WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
1274
1275	/*
1276	 * Invoke the former CPU_DYING callbacks. DYING must not fail!
1277	 */
1278	cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
1279
1280	/* Park the stopper thread */
1281	stop_machine_park(cpu);
1282	return 0;
1283}
1284
1285static int takedown_cpu(unsigned int cpu)
1286{
1287	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1288	int err;
1289
1290	/* Park the smpboot threads */
1291	kthread_park(st->thread);
1292
1293	/*
1294	 * Prevent irq alloc/free while the dying cpu reorganizes the
1295	 * interrupt affinities.
1296	 */
1297	irq_lock_sparse();
1298
1299	/*
1300	 * So now all preempt/rcu users must observe !cpu_active().
1301	 */
1302	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1303	if (err) {
1304		/* CPU refused to die */
1305		irq_unlock_sparse();
1306		/* Unpark the hotplug thread so we can rollback there */
1307		kthread_unpark(st->thread);
1308		return err;
1309	}
1310	BUG_ON(cpu_online(cpu));
1311
1312	/*
1313	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1314	 * all runnable tasks from the CPU, there's only the idle task left now
1315	 * that the migration thread is done doing the stop_machine thing.
1316	 *
1317	 * Wait for the stop thread to go away.
1318	 */
1319	wait_for_ap_thread(st, false);
1320	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1321
1322	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
1323	irq_unlock_sparse();
1324
1325	hotplug_cpu__broadcast_tick_pull(cpu);
1326	/* This actually kills the CPU. */
1327	__cpu_die(cpu);
1328
1329	cpuhp_bp_sync_dead(cpu);
1330
1331	tick_cleanup_dead_cpu(cpu);
1332
1333	/*
1334	 * Callbacks must be re-integrated right away to the RCU state machine.
1335	 * Otherwise an RCU callback could block a further teardown function
1336	 * waiting for its completion.
1337	 */
1338	rcutree_migrate_callbacks(cpu);
1339
1340	return 0;
1341}
1342
1343static void cpuhp_complete_idle_dead(void *arg)
1344{
1345	struct cpuhp_cpu_state *st = arg;
1346
1347	complete_ap_thread(st, false);
1348}
1349
1350void cpuhp_report_idle_dead(void)
1351{
1352	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1353
1354	BUG_ON(st->state != CPUHP_AP_OFFLINE);
1355	tick_assert_timekeeping_handover();
1356	rcutree_report_cpu_dead();
1357	st->state = CPUHP_AP_IDLE_DEAD;
1358	/*
1359	 * We cannot call complete after rcutree_report_cpu_dead() so we delegate it
1360	 * to an online cpu.
1361	 */
1362	smp_call_function_single(cpumask_first(cpu_online_mask),
1363				 cpuhp_complete_idle_dead, st, 0);
1364}
1365
1366static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1367				enum cpuhp_state target)
1368{
1369	enum cpuhp_state prev_state = st->state;
1370	int ret = 0;
1371
1372	ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1373	if (ret) {
1374		pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1375			 ret, cpu, cpuhp_get_step(st->state)->name,
1376			 st->state);
1377
1378		cpuhp_reset_state(cpu, st, prev_state);
1379
1380		if (st->state < prev_state)
1381			WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1382							    prev_state));
1383	}
1384
1385	return ret;
1386}
1387
1388/* Requires cpu_add_remove_lock to be held */
1389static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1390			   enum cpuhp_state target)
1391{
1392	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1393	int prev_state, ret = 0;
1394
1395	if (num_online_cpus() == 1)
1396		return -EBUSY;
1397
1398	if (!cpu_present(cpu))
1399		return -EINVAL;
1400
1401	cpus_write_lock();
1402
1403	cpuhp_tasks_frozen = tasks_frozen;
1404
1405	prev_state = cpuhp_set_state(cpu, st, target);
1406	/*
1407	 * If the current CPU state is in the range of the AP hotplug thread,
1408	 * then we need to kick the thread.
1409	 */
1410	if (st->state > CPUHP_TEARDOWN_CPU) {
1411		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1412		ret = cpuhp_kick_ap_work(cpu);
1413		/*
1414		 * The AP side has done the error rollback already. Just
1415		 * return the error code..
1416		 */
1417		if (ret)
1418			goto out;
1419
1420		/*
1421		 * We might have stopped still in the range of the AP hotplug
1422		 * thread. Nothing to do anymore.
1423		 */
1424		if (st->state > CPUHP_TEARDOWN_CPU)
1425			goto out;
1426
1427		st->target = target;
1428	}
1429	/*
1430	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1431	 * to do the further cleanups.
1432	 */
1433	ret = cpuhp_down_callbacks(cpu, st, target);
1434	if (ret && st->state < prev_state) {
1435		if (st->state == CPUHP_TEARDOWN_CPU) {
1436			cpuhp_reset_state(cpu, st, prev_state);
1437			__cpuhp_kick_ap(st);
1438		} else {
1439			WARN(1, "DEAD callback error for CPU%d", cpu);
1440		}
1441	}
1442
1443out:
1444	cpus_write_unlock();
1445	/*
1446	 * Do post unplug cleanup. This is still protected against
1447	 * concurrent CPU hotplug via cpu_add_remove_lock.
1448	 */
1449	lockup_detector_cleanup();
1450	arch_smt_update();
1451	return ret;
1452}
1453
1454struct cpu_down_work {
1455	unsigned int		cpu;
1456	enum cpuhp_state	target;
1457};
1458
1459static long __cpu_down_maps_locked(void *arg)
1460{
1461	struct cpu_down_work *work = arg;
1462
1463	return _cpu_down(work->cpu, 0, work->target);
1464}
1465
1466static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1467{
1468	struct cpu_down_work work = { .cpu = cpu, .target = target, };
1469
1470	/*
1471	 * If the platform does not support hotplug, report it explicitly to
1472	 * differentiate it from a transient offlining failure.
1473	 */
1474	if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
1475		return -EOPNOTSUPP;
1476	if (cpu_hotplug_disabled)
1477		return -EBUSY;
1478
1479	/*
1480	 * Ensure that the control task does not run on the to be offlined
1481	 * CPU to prevent a deadlock against cfs_b->period_timer.
1482	 * Also keep at least one housekeeping cpu onlined to avoid generating
1483	 * an empty sched_domain span.
1484	 */
1485	for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
1486		if (cpu != work.cpu)
1487			return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
1488	}
1489	return -EBUSY;
1490}
1491
1492static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1493{
1494	int err;
1495
1496	cpu_maps_update_begin();
1497	err = cpu_down_maps_locked(cpu, target);
1498	cpu_maps_update_done();
1499	return err;
1500}
1501
1502/**
1503 * cpu_device_down - Bring down a cpu device
1504 * @dev: Pointer to the cpu device to offline
1505 *
1506 * This function is meant to be used by device core cpu subsystem only.
1507 *
1508 * Other subsystems should use remove_cpu() instead.
1509 *
1510 * Return: %0 on success or a negative errno code
1511 */
1512int cpu_device_down(struct device *dev)
1513{
1514	return cpu_down(dev->id, CPUHP_OFFLINE);
1515}
1516
1517int remove_cpu(unsigned int cpu)
1518{
1519	int ret;
1520
1521	lock_device_hotplug();
1522	ret = device_offline(get_cpu_device(cpu));
1523	unlock_device_hotplug();
1524
1525	return ret;
1526}
1527EXPORT_SYMBOL_GPL(remove_cpu);
1528
1529void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1530{
1531	unsigned int cpu;
1532	int error;
1533
1534	cpu_maps_update_begin();
1535
1536	/*
1537	 * Make certain the cpu I'm about to reboot on is online.
1538	 *
1539	 * This is inline to what migrate_to_reboot_cpu() already do.
1540	 */
1541	if (!cpu_online(primary_cpu))
1542		primary_cpu = cpumask_first(cpu_online_mask);
1543
1544	for_each_online_cpu(cpu) {
1545		if (cpu == primary_cpu)
1546			continue;
1547
1548		error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1549		if (error) {
1550			pr_err("Failed to offline CPU%d - error=%d",
1551				cpu, error);
1552			break;
1553		}
1554	}
1555
1556	/*
1557	 * Ensure all but the reboot CPU are offline.
1558	 */
1559	BUG_ON(num_online_cpus() > 1);
1560
1561	/*
1562	 * Make sure the CPUs won't be enabled by someone else after this
1563	 * point. Kexec will reboot to a new kernel shortly resetting
1564	 * everything along the way.
1565	 */
1566	cpu_hotplug_disabled++;
1567
1568	cpu_maps_update_done();
1569}
1570
1571#else
1572#define takedown_cpu		NULL
1573#endif /*CONFIG_HOTPLUG_CPU*/
1574
1575/**
1576 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1577 * @cpu: cpu that just started
1578 *
1579 * It must be called by the arch code on the new cpu, before the new cpu
1580 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1581 */
1582void notify_cpu_starting(unsigned int cpu)
1583{
1584	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1585	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1586
1587	rcutree_report_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1588	cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1589
1590	/*
1591	 * STARTING must not fail!
1592	 */
1593	cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
1594}
1595
1596/*
1597 * Called from the idle task. Wake up the controlling task which brings the
1598 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1599 * online bringup to the hotplug thread.
1600 */
1601void cpuhp_online_idle(enum cpuhp_state state)
1602{
1603	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1604
1605	/* Happens for the boot cpu */
1606	if (state != CPUHP_AP_ONLINE_IDLE)
1607		return;
1608
1609	cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
1610
1611	/*
1612	 * Unpark the stopper thread before we start the idle loop (and start
1613	 * scheduling); this ensures the stopper task is always available.
1614	 */
1615	stop_machine_unpark(smp_processor_id());
1616
1617	st->state = CPUHP_AP_ONLINE_IDLE;
1618	complete_ap_thread(st, true);
1619}
1620
1621/* Requires cpu_add_remove_lock to be held */
1622static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1623{
1624	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1625	struct task_struct *idle;
1626	int ret = 0;
1627
1628	cpus_write_lock();
1629
1630	if (!cpu_present(cpu)) {
1631		ret = -EINVAL;
1632		goto out;
1633	}
1634
1635	/*
1636	 * The caller of cpu_up() might have raced with another
1637	 * caller. Nothing to do.
1638	 */
1639	if (st->state >= target)
1640		goto out;
1641
1642	if (st->state == CPUHP_OFFLINE) {
1643		/* Let it fail before we try to bring the cpu up */
1644		idle = idle_thread_get(cpu);
1645		if (IS_ERR(idle)) {
1646			ret = PTR_ERR(idle);
1647			goto out;
1648		}
1649
1650		/*
1651		 * Reset stale stack state from the last time this CPU was online.
1652		 */
1653		scs_task_reset(idle);
1654		kasan_unpoison_task_stack(idle);
1655	}
1656
1657	cpuhp_tasks_frozen = tasks_frozen;
1658
1659	cpuhp_set_state(cpu, st, target);
1660	/*
1661	 * If the current CPU state is in the range of the AP hotplug thread,
1662	 * then we need to kick the thread once more.
1663	 */
1664	if (st->state > CPUHP_BRINGUP_CPU) {
1665		ret = cpuhp_kick_ap_work(cpu);
1666		/*
1667		 * The AP side has done the error rollback already. Just
1668		 * return the error code..
1669		 */
1670		if (ret)
1671			goto out;
1672	}
1673
1674	/*
1675	 * Try to reach the target state. We max out on the BP at
1676	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1677	 * responsible for bringing it up to the target state.
1678	 */
1679	target = min((int)target, CPUHP_BRINGUP_CPU);
1680	ret = cpuhp_up_callbacks(cpu, st, target);
1681out:
1682	cpus_write_unlock();
1683	arch_smt_update();
1684	return ret;
1685}
1686
1687static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1688{
1689	int err = 0;
1690
1691	if (!cpu_possible(cpu)) {
1692		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1693		       cpu);
1694		return -EINVAL;
1695	}
1696
1697	err = try_online_node(cpu_to_node(cpu));
1698	if (err)
1699		return err;
1700
1701	cpu_maps_update_begin();
1702
1703	if (cpu_hotplug_disabled) {
1704		err = -EBUSY;
1705		goto out;
1706	}
1707	if (!cpu_bootable(cpu)) {
1708		err = -EPERM;
1709		goto out;
1710	}
1711
1712	err = _cpu_up(cpu, 0, target);
1713out:
1714	cpu_maps_update_done();
1715	return err;
1716}
1717
1718/**
1719 * cpu_device_up - Bring up a cpu device
1720 * @dev: Pointer to the cpu device to online
1721 *
1722 * This function is meant to be used by device core cpu subsystem only.
1723 *
1724 * Other subsystems should use add_cpu() instead.
1725 *
1726 * Return: %0 on success or a negative errno code
1727 */
1728int cpu_device_up(struct device *dev)
1729{
1730	return cpu_up(dev->id, CPUHP_ONLINE);
1731}
1732
1733int add_cpu(unsigned int cpu)
1734{
1735	int ret;
1736
1737	lock_device_hotplug();
1738	ret = device_online(get_cpu_device(cpu));
1739	unlock_device_hotplug();
1740
1741	return ret;
1742}
1743EXPORT_SYMBOL_GPL(add_cpu);
1744
1745/**
1746 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1747 * @sleep_cpu: The cpu we hibernated on and should be brought up.
1748 *
1749 * On some architectures like arm64, we can hibernate on any CPU, but on
1750 * wake up the CPU we hibernated on might be offline as a side effect of
1751 * using maxcpus= for example.
1752 *
1753 * Return: %0 on success or a negative errno code
1754 */
1755int bringup_hibernate_cpu(unsigned int sleep_cpu)
1756{
1757	int ret;
1758
1759	if (!cpu_online(sleep_cpu)) {
1760		pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1761		ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1762		if (ret) {
1763			pr_err("Failed to bring hibernate-CPU up!\n");
1764			return ret;
1765		}
1766	}
1767	return 0;
1768}
1769
1770static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
1771				      enum cpuhp_state target)
1772{
1773	unsigned int cpu;
1774
1775	for_each_cpu(cpu, mask) {
1776		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1777
1778		if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
1779			/*
1780			 * If this failed then cpu_up() might have only
1781			 * rolled back to CPUHP_BP_KICK_AP for the final
1782			 * online. Clean it up. NOOP if already rolled back.
1783			 */
1784			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
1785		}
1786
1787		if (!--ncpus)
1788			break;
1789	}
1790}
1791
1792#ifdef CONFIG_HOTPLUG_PARALLEL
1793static bool __cpuhp_parallel_bringup __ro_after_init = true;
1794
1795static int __init parallel_bringup_parse_param(char *arg)
1796{
1797	return kstrtobool(arg, &__cpuhp_parallel_bringup);
1798}
1799early_param("cpuhp.parallel", parallel_bringup_parse_param);
1800
1801static inline bool cpuhp_smt_aware(void)
1802{
1803	return cpu_smt_max_threads > 1;
1804}
1805
1806static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
1807{
1808	return cpu_primary_thread_mask;
1809}
1810
1811/*
1812 * On architectures which have enabled parallel bringup this invokes all BP
1813 * prepare states for each of the to be onlined APs first. The last state
1814 * sends the startup IPI to the APs. The APs proceed through the low level
1815 * bringup code in parallel and then wait for the control CPU to release
1816 * them one by one for the final onlining procedure.
1817 *
1818 * This avoids waiting for each AP to respond to the startup IPI in
1819 * CPUHP_BRINGUP_CPU.
1820 */
1821static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
1822{
1823	const struct cpumask *mask = cpu_present_mask;
1824
1825	if (__cpuhp_parallel_bringup)
1826		__cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
1827	if (!__cpuhp_parallel_bringup)
1828		return false;
1829
1830	if (cpuhp_smt_aware()) {
1831		const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
1832		static struct cpumask tmp_mask __initdata;
1833
1834		/*
1835		 * X86 requires to prevent that SMT siblings stopped while
1836		 * the primary thread does a microcode update for various
1837		 * reasons. Bring the primary threads up first.
1838		 */
1839		cpumask_and(&tmp_mask, mask, pmask);
1840		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
1841		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
1842		/* Account for the online CPUs */
1843		ncpus -= num_online_cpus();
1844		if (!ncpus)
1845			return true;
1846		/* Create the mask for secondary CPUs */
1847		cpumask_andnot(&tmp_mask, mask, pmask);
1848		mask = &tmp_mask;
1849	}
1850
1851	/* Bring the not-yet started CPUs up */
1852	cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
1853	cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
1854	return true;
1855}
1856#else
1857static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
1858#endif /* CONFIG_HOTPLUG_PARALLEL */
1859
1860void __init bringup_nonboot_cpus(unsigned int max_cpus)
1861{
1862	/* Try parallel bringup optimization if enabled */
1863	if (cpuhp_bringup_cpus_parallel(max_cpus))
1864		return;
1865
1866	/* Full per CPU serialized bringup */
1867	cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE);
1868}
1869
1870#ifdef CONFIG_PM_SLEEP_SMP
1871static cpumask_var_t frozen_cpus;
1872
1873int freeze_secondary_cpus(int primary)
1874{
1875	int cpu, error = 0;
1876
1877	cpu_maps_update_begin();
1878	if (primary == -1) {
1879		primary = cpumask_first(cpu_online_mask);
1880		if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
1881			primary = housekeeping_any_cpu(HK_TYPE_TIMER);
1882	} else {
1883		if (!cpu_online(primary))
1884			primary = cpumask_first(cpu_online_mask);
1885	}
1886
1887	/*
1888	 * We take down all of the non-boot CPUs in one shot to avoid races
1889	 * with the userspace trying to use the CPU hotplug at the same time
1890	 */
1891	cpumask_clear(frozen_cpus);
1892
1893	pr_info("Disabling non-boot CPUs ...\n");
1894	for_each_online_cpu(cpu) {
1895		if (cpu == primary)
1896			continue;
1897
1898		if (pm_wakeup_pending()) {
1899			pr_info("Wakeup pending. Abort CPU freeze\n");
1900			error = -EBUSY;
1901			break;
1902		}
1903
1904		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1905		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1906		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1907		if (!error)
1908			cpumask_set_cpu(cpu, frozen_cpus);
1909		else {
1910			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1911			break;
1912		}
1913	}
1914
1915	if (!error)
1916		BUG_ON(num_online_cpus() > 1);
1917	else
1918		pr_err("Non-boot CPUs are not disabled\n");
1919
1920	/*
1921	 * Make sure the CPUs won't be enabled by someone else. We need to do
1922	 * this even in case of failure as all freeze_secondary_cpus() users are
1923	 * supposed to do thaw_secondary_cpus() on the failure path.
1924	 */
1925	cpu_hotplug_disabled++;
1926
1927	cpu_maps_update_done();
1928	return error;
1929}
1930
1931void __weak arch_thaw_secondary_cpus_begin(void)
1932{
1933}
1934
1935void __weak arch_thaw_secondary_cpus_end(void)
1936{
1937}
1938
1939void thaw_secondary_cpus(void)
1940{
1941	int cpu, error;
1942
1943	/* Allow everyone to use the CPU hotplug again */
1944	cpu_maps_update_begin();
1945	__cpu_hotplug_enable();
1946	if (cpumask_empty(frozen_cpus))
1947		goto out;
1948
1949	pr_info("Enabling non-boot CPUs ...\n");
1950
1951	arch_thaw_secondary_cpus_begin();
1952
1953	for_each_cpu(cpu, frozen_cpus) {
1954		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1955		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1956		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1957		if (!error) {
1958			pr_info("CPU%d is up\n", cpu);
1959			continue;
1960		}
1961		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1962	}
1963
1964	arch_thaw_secondary_cpus_end();
1965
1966	cpumask_clear(frozen_cpus);
1967out:
1968	cpu_maps_update_done();
1969}
1970
1971static int __init alloc_frozen_cpus(void)
1972{
1973	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1974		return -ENOMEM;
1975	return 0;
1976}
1977core_initcall(alloc_frozen_cpus);
1978
1979/*
1980 * When callbacks for CPU hotplug notifications are being executed, we must
1981 * ensure that the state of the system with respect to the tasks being frozen
1982 * or not, as reported by the notification, remains unchanged *throughout the
1983 * duration* of the execution of the callbacks.
1984 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1985 *
1986 * This synchronization is implemented by mutually excluding regular CPU
1987 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1988 * Hibernate notifications.
1989 */
1990static int
1991cpu_hotplug_pm_callback(struct notifier_block *nb,
1992			unsigned long action, void *ptr)
1993{
1994	switch (action) {
1995
1996	case PM_SUSPEND_PREPARE:
1997	case PM_HIBERNATION_PREPARE:
1998		cpu_hotplug_disable();
1999		break;
2000
2001	case PM_POST_SUSPEND:
2002	case PM_POST_HIBERNATION:
2003		cpu_hotplug_enable();
2004		break;
2005
2006	default:
2007		return NOTIFY_DONE;
2008	}
2009
2010	return NOTIFY_OK;
2011}
2012
2013
2014static int __init cpu_hotplug_pm_sync_init(void)
2015{
2016	/*
2017	 * cpu_hotplug_pm_callback has higher priority than x86
2018	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
2019	 * to disable cpu hotplug to avoid cpu hotplug race.
2020	 */
2021	pm_notifier(cpu_hotplug_pm_callback, 0);
2022	return 0;
2023}
2024core_initcall(cpu_hotplug_pm_sync_init);
2025
2026#endif /* CONFIG_PM_SLEEP_SMP */
2027
2028int __boot_cpu_id;
2029
2030#endif /* CONFIG_SMP */
2031
2032/* Boot processor state steps */
2033static struct cpuhp_step cpuhp_hp_states[] = {
2034	[CPUHP_OFFLINE] = {
2035		.name			= "offline",
2036		.startup.single		= NULL,
2037		.teardown.single	= NULL,
2038	},
2039#ifdef CONFIG_SMP
2040	[CPUHP_CREATE_THREADS]= {
2041		.name			= "threads:prepare",
2042		.startup.single		= smpboot_create_threads,
2043		.teardown.single	= NULL,
2044		.cant_stop		= true,
2045	},
2046	[CPUHP_PERF_PREPARE] = {
2047		.name			= "perf:prepare",
2048		.startup.single		= perf_event_init_cpu,
2049		.teardown.single	= perf_event_exit_cpu,
2050	},
2051	[CPUHP_RANDOM_PREPARE] = {
2052		.name			= "random:prepare",
2053		.startup.single		= random_prepare_cpu,
2054		.teardown.single	= NULL,
2055	},
2056	[CPUHP_WORKQUEUE_PREP] = {
2057		.name			= "workqueue:prepare",
2058		.startup.single		= workqueue_prepare_cpu,
2059		.teardown.single	= NULL,
2060	},
2061	[CPUHP_HRTIMERS_PREPARE] = {
2062		.name			= "hrtimers:prepare",
2063		.startup.single		= hrtimers_prepare_cpu,
2064		.teardown.single	= NULL,
2065	},
2066	[CPUHP_SMPCFD_PREPARE] = {
2067		.name			= "smpcfd:prepare",
2068		.startup.single		= smpcfd_prepare_cpu,
2069		.teardown.single	= smpcfd_dead_cpu,
2070	},
2071	[CPUHP_RELAY_PREPARE] = {
2072		.name			= "relay:prepare",
2073		.startup.single		= relay_prepare_cpu,
2074		.teardown.single	= NULL,
2075	},
2076	[CPUHP_RCUTREE_PREP] = {
2077		.name			= "RCU/tree:prepare",
2078		.startup.single		= rcutree_prepare_cpu,
2079		.teardown.single	= rcutree_dead_cpu,
2080	},
2081	/*
2082	 * On the tear-down path, timers_dead_cpu() must be invoked
2083	 * before blk_mq_queue_reinit_notify() from notify_dead(),
2084	 * otherwise a RCU stall occurs.
2085	 */
2086	[CPUHP_TIMERS_PREPARE] = {
2087		.name			= "timers:prepare",
2088		.startup.single		= timers_prepare_cpu,
2089		.teardown.single	= timers_dead_cpu,
2090	},
2091
2092#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
2093	/*
2094	 * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
2095	 * the next step will release it.
2096	 */
2097	[CPUHP_BP_KICK_AP] = {
2098		.name			= "cpu:kick_ap",
2099		.startup.single		= cpuhp_kick_ap_alive,
2100	},
2101
2102	/*
2103	 * Waits for the AP to reach cpuhp_ap_sync_alive() and then
2104	 * releases it for the complete bringup.
2105	 */
2106	[CPUHP_BRINGUP_CPU] = {
2107		.name			= "cpu:bringup",
2108		.startup.single		= cpuhp_bringup_ap,
2109		.teardown.single	= finish_cpu,
2110		.cant_stop		= true,
2111	},
2112#else
2113	/*
2114	 * All-in-one CPU bringup state which includes the kick alive.
2115	 */
2116	[CPUHP_BRINGUP_CPU] = {
2117		.name			= "cpu:bringup",
2118		.startup.single		= bringup_cpu,
2119		.teardown.single	= finish_cpu,
2120		.cant_stop		= true,
2121	},
2122#endif
2123	/* Final state before CPU kills itself */
2124	[CPUHP_AP_IDLE_DEAD] = {
2125		.name			= "idle:dead",
2126	},
2127	/*
2128	 * Last state before CPU enters the idle loop to die. Transient state
2129	 * for synchronization.
2130	 */
2131	[CPUHP_AP_OFFLINE] = {
2132		.name			= "ap:offline",
2133		.cant_stop		= true,
2134	},
2135	/* First state is scheduler control. Interrupts are disabled */
2136	[CPUHP_AP_SCHED_STARTING] = {
2137		.name			= "sched:starting",
2138		.startup.single		= sched_cpu_starting,
2139		.teardown.single	= sched_cpu_dying,
2140	},
2141	[CPUHP_AP_RCUTREE_DYING] = {
2142		.name			= "RCU/tree:dying",
2143		.startup.single		= NULL,
2144		.teardown.single	= rcutree_dying_cpu,
2145	},
2146	[CPUHP_AP_SMPCFD_DYING] = {
2147		.name			= "smpcfd:dying",
2148		.startup.single		= NULL,
2149		.teardown.single	= smpcfd_dying_cpu,
2150	},
2151	[CPUHP_AP_HRTIMERS_DYING] = {
2152		.name			= "hrtimers:dying",
2153		.startup.single		= NULL,
2154		.teardown.single	= hrtimers_cpu_dying,
2155	},
2156	[CPUHP_AP_TICK_DYING] = {
2157		.name			= "tick:dying",
2158		.startup.single		= NULL,
2159		.teardown.single	= tick_cpu_dying,
2160	},
2161	/* Entry state on starting. Interrupts enabled from here on. Transient
2162	 * state for synchronsization */
2163	[CPUHP_AP_ONLINE] = {
2164		.name			= "ap:online",
2165	},
2166	/*
2167	 * Handled on control processor until the plugged processor manages
2168	 * this itself.
2169	 */
2170	[CPUHP_TEARDOWN_CPU] = {
2171		.name			= "cpu:teardown",
2172		.startup.single		= NULL,
2173		.teardown.single	= takedown_cpu,
2174		.cant_stop		= true,
2175	},
2176
2177	[CPUHP_AP_SCHED_WAIT_EMPTY] = {
2178		.name			= "sched:waitempty",
2179		.startup.single		= NULL,
2180		.teardown.single	= sched_cpu_wait_empty,
2181	},
2182
2183	/* Handle smpboot threads park/unpark */
2184	[CPUHP_AP_SMPBOOT_THREADS] = {
2185		.name			= "smpboot/threads:online",
2186		.startup.single		= smpboot_unpark_threads,
2187		.teardown.single	= smpboot_park_threads,
2188	},
2189	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
2190		.name			= "irq/affinity:online",
2191		.startup.single		= irq_affinity_online_cpu,
2192		.teardown.single	= NULL,
2193	},
2194	[CPUHP_AP_PERF_ONLINE] = {
2195		.name			= "perf:online",
2196		.startup.single		= perf_event_init_cpu,
2197		.teardown.single	= perf_event_exit_cpu,
2198	},
2199	[CPUHP_AP_WATCHDOG_ONLINE] = {
2200		.name			= "lockup_detector:online",
2201		.startup.single		= lockup_detector_online_cpu,
2202		.teardown.single	= lockup_detector_offline_cpu,
2203	},
2204	[CPUHP_AP_WORKQUEUE_ONLINE] = {
2205		.name			= "workqueue:online",
2206		.startup.single		= workqueue_online_cpu,
2207		.teardown.single	= workqueue_offline_cpu,
2208	},
2209	[CPUHP_AP_RANDOM_ONLINE] = {
2210		.name			= "random:online",
2211		.startup.single		= random_online_cpu,
2212		.teardown.single	= NULL,
2213	},
2214	[CPUHP_AP_RCUTREE_ONLINE] = {
2215		.name			= "RCU/tree:online",
2216		.startup.single		= rcutree_online_cpu,
2217		.teardown.single	= rcutree_offline_cpu,
2218	},
2219#endif
2220	/*
2221	 * The dynamically registered state space is here
2222	 */
2223
2224#ifdef CONFIG_SMP
2225	/* Last state is scheduler control setting the cpu active */
2226	[CPUHP_AP_ACTIVE] = {
2227		.name			= "sched:active",
2228		.startup.single		= sched_cpu_activate,
2229		.teardown.single	= sched_cpu_deactivate,
2230	},
2231#endif
2232
2233	/* CPU is fully up and running. */
2234	[CPUHP_ONLINE] = {
2235		.name			= "online",
2236		.startup.single		= NULL,
2237		.teardown.single	= NULL,
2238	},
2239};
2240
2241/* Sanity check for callbacks */
2242static int cpuhp_cb_check(enum cpuhp_state state)
2243{
2244	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
2245		return -EINVAL;
2246	return 0;
2247}
2248
2249/*
2250 * Returns a free for dynamic slot assignment of the Online state. The states
2251 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
2252 * by having no name assigned.
2253 */
2254static int cpuhp_reserve_state(enum cpuhp_state state)
2255{
2256	enum cpuhp_state i, end;
2257	struct cpuhp_step *step;
2258
2259	switch (state) {
2260	case CPUHP_AP_ONLINE_DYN:
2261		step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
2262		end = CPUHP_AP_ONLINE_DYN_END;
2263		break;
2264	case CPUHP_BP_PREPARE_DYN:
2265		step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
2266		end = CPUHP_BP_PREPARE_DYN_END;
2267		break;
2268	default:
2269		return -EINVAL;
2270	}
2271
2272	for (i = state; i <= end; i++, step++) {
2273		if (!step->name)
2274			return i;
2275	}
2276	WARN(1, "No more dynamic states available for CPU hotplug\n");
2277	return -ENOSPC;
2278}
2279
2280static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
2281				 int (*startup)(unsigned int cpu),
2282				 int (*teardown)(unsigned int cpu),
2283				 bool multi_instance)
2284{
2285	/* (Un)Install the callbacks for further cpu hotplug operations */
2286	struct cpuhp_step *sp;
2287	int ret = 0;
2288
2289	/*
2290	 * If name is NULL, then the state gets removed.
2291	 *
2292	 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
2293	 * the first allocation from these dynamic ranges, so the removal
2294	 * would trigger a new allocation and clear the wrong (already
2295	 * empty) state, leaving the callbacks of the to be cleared state
2296	 * dangling, which causes wreckage on the next hotplug operation.
2297	 */
2298	if (name && (state == CPUHP_AP_ONLINE_DYN ||
2299		     state == CPUHP_BP_PREPARE_DYN)) {
2300		ret = cpuhp_reserve_state(state);
2301		if (ret < 0)
2302			return ret;
2303		state = ret;
2304	}
2305	sp = cpuhp_get_step(state);
2306	if (name && sp->name)
2307		return -EBUSY;
2308
2309	sp->startup.single = startup;
2310	sp->teardown.single = teardown;
2311	sp->name = name;
2312	sp->multi_instance = multi_instance;
2313	INIT_HLIST_HEAD(&sp->list);
2314	return ret;
2315}
2316
2317static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
2318{
2319	return cpuhp_get_step(state)->teardown.single;
2320}
2321
2322/*
2323 * Call the startup/teardown function for a step either on the AP or
2324 * on the current CPU.
2325 */
2326static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
2327			    struct hlist_node *node)
2328{
2329	struct cpuhp_step *sp = cpuhp_get_step(state);
2330	int ret;
2331
2332	/*
2333	 * If there's nothing to do, we done.
2334	 * Relies on the union for multi_instance.
2335	 */
2336	if (cpuhp_step_empty(bringup, sp))
2337		return 0;
2338	/*
2339	 * The non AP bound callbacks can fail on bringup. On teardown
2340	 * e.g. module removal we crash for now.
2341	 */
2342#ifdef CONFIG_SMP
2343	if (cpuhp_is_ap_state(state))
2344		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
2345	else
2346		ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2347#else
2348	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2349#endif
2350	BUG_ON(ret && !bringup);
2351	return ret;
2352}
2353
2354/*
2355 * Called from __cpuhp_setup_state on a recoverable failure.
2356 *
2357 * Note: The teardown callbacks for rollback are not allowed to fail!
2358 */
2359static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
2360				   struct hlist_node *node)
2361{
2362	int cpu;
2363
2364	/* Roll back the already executed steps on the other cpus */
2365	for_each_present_cpu(cpu) {
2366		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2367		int cpustate = st->state;
2368
2369		if (cpu >= failedcpu)
2370			break;
2371
2372		/* Did we invoke the startup call on that cpu ? */
2373		if (cpustate >= state)
2374			cpuhp_issue_call(cpu, state, false, node);
2375	}
2376}
2377
2378int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
2379					  struct hlist_node *node,
2380					  bool invoke)
2381{
2382	struct cpuhp_step *sp;
2383	int cpu;
2384	int ret;
2385
2386	lockdep_assert_cpus_held();
2387
2388	sp = cpuhp_get_step(state);
2389	if (sp->multi_instance == false)
2390		return -EINVAL;
2391
2392	mutex_lock(&cpuhp_state_mutex);
2393
2394	if (!invoke || !sp->startup.multi)
2395		goto add_node;
2396
2397	/*
2398	 * Try to call the startup callback for each present cpu
2399	 * depending on the hotplug state of the cpu.
2400	 */
2401	for_each_present_cpu(cpu) {
2402		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2403		int cpustate = st->state;
2404
2405		if (cpustate < state)
2406			continue;
2407
2408		ret = cpuhp_issue_call(cpu, state, true, node);
2409		if (ret) {
2410			if (sp->teardown.multi)
2411				cpuhp_rollback_install(cpu, state, node);
2412			goto unlock;
2413		}
2414	}
2415add_node:
2416	ret = 0;
2417	hlist_add_head(node, &sp->list);
2418unlock:
2419	mutex_unlock(&cpuhp_state_mutex);
2420	return ret;
2421}
2422
2423int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2424			       bool invoke)
2425{
2426	int ret;
2427
2428	cpus_read_lock();
2429	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2430	cpus_read_unlock();
2431	return ret;
2432}
2433EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2434
2435/**
2436 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2437 * @state:		The state to setup
2438 * @name:		Name of the step
2439 * @invoke:		If true, the startup function is invoked for cpus where
2440 *			cpu state >= @state
2441 * @startup:		startup callback function
2442 * @teardown:		teardown callback function
2443 * @multi_instance:	State is set up for multiple instances which get
2444 *			added afterwards.
2445 *
2446 * The caller needs to hold cpus read locked while calling this function.
2447 * Return:
2448 *   On success:
2449 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN;
2450 *      0 for all other states
2451 *   On failure: proper (negative) error code
2452 */
2453int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2454				   const char *name, bool invoke,
2455				   int (*startup)(unsigned int cpu),
2456				   int (*teardown)(unsigned int cpu),
2457				   bool multi_instance)
2458{
2459	int cpu, ret = 0;
2460	bool dynstate;
2461
2462	lockdep_assert_cpus_held();
2463
2464	if (cpuhp_cb_check(state) || !name)
2465		return -EINVAL;
2466
2467	mutex_lock(&cpuhp_state_mutex);
2468
2469	ret = cpuhp_store_callbacks(state, name, startup, teardown,
2470				    multi_instance);
2471
2472	dynstate = state == CPUHP_AP_ONLINE_DYN;
2473	if (ret > 0 && dynstate) {
2474		state = ret;
2475		ret = 0;
2476	}
2477
2478	if (ret || !invoke || !startup)
2479		goto out;
2480
2481	/*
2482	 * Try to call the startup callback for each present cpu
2483	 * depending on the hotplug state of the cpu.
2484	 */
2485	for_each_present_cpu(cpu) {
2486		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2487		int cpustate = st->state;
2488
2489		if (cpustate < state)
2490			continue;
2491
2492		ret = cpuhp_issue_call(cpu, state, true, NULL);
2493		if (ret) {
2494			if (teardown)
2495				cpuhp_rollback_install(cpu, state, NULL);
2496			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2497			goto out;
2498		}
2499	}
2500out:
2501	mutex_unlock(&cpuhp_state_mutex);
2502	/*
2503	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2504	 * dynamically allocated state in case of success.
2505	 */
2506	if (!ret && dynstate)
2507		return state;
2508	return ret;
2509}
2510EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2511
2512int __cpuhp_setup_state(enum cpuhp_state state,
2513			const char *name, bool invoke,
2514			int (*startup)(unsigned int cpu),
2515			int (*teardown)(unsigned int cpu),
2516			bool multi_instance)
2517{
2518	int ret;
2519
2520	cpus_read_lock();
2521	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2522					     teardown, multi_instance);
2523	cpus_read_unlock();
2524	return ret;
2525}
2526EXPORT_SYMBOL(__cpuhp_setup_state);
2527
2528int __cpuhp_state_remove_instance(enum cpuhp_state state,
2529				  struct hlist_node *node, bool invoke)
2530{
2531	struct cpuhp_step *sp = cpuhp_get_step(state);
2532	int cpu;
2533
2534	BUG_ON(cpuhp_cb_check(state));
2535
2536	if (!sp->multi_instance)
2537		return -EINVAL;
2538
2539	cpus_read_lock();
2540	mutex_lock(&cpuhp_state_mutex);
2541
2542	if (!invoke || !cpuhp_get_teardown_cb(state))
2543		goto remove;
2544	/*
2545	 * Call the teardown callback for each present cpu depending
2546	 * on the hotplug state of the cpu. This function is not
2547	 * allowed to fail currently!
2548	 */
2549	for_each_present_cpu(cpu) {
2550		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2551		int cpustate = st->state;
2552
2553		if (cpustate >= state)
2554			cpuhp_issue_call(cpu, state, false, node);
2555	}
2556
2557remove:
2558	hlist_del(node);
2559	mutex_unlock(&cpuhp_state_mutex);
2560	cpus_read_unlock();
2561
2562	return 0;
2563}
2564EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2565
2566/**
2567 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2568 * @state:	The state to remove
2569 * @invoke:	If true, the teardown function is invoked for cpus where
2570 *		cpu state >= @state
2571 *
2572 * The caller needs to hold cpus read locked while calling this function.
2573 * The teardown callback is currently not allowed to fail. Think
2574 * about module removal!
2575 */
2576void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2577{
2578	struct cpuhp_step *sp = cpuhp_get_step(state);
2579	int cpu;
2580
2581	BUG_ON(cpuhp_cb_check(state));
2582
2583	lockdep_assert_cpus_held();
2584
2585	mutex_lock(&cpuhp_state_mutex);
2586	if (sp->multi_instance) {
2587		WARN(!hlist_empty(&sp->list),
2588		     "Error: Removing state %d which has instances left.\n",
2589		     state);
2590		goto remove;
2591	}
2592
2593	if (!invoke || !cpuhp_get_teardown_cb(state))
2594		goto remove;
2595
2596	/*
2597	 * Call the teardown callback for each present cpu depending
2598	 * on the hotplug state of the cpu. This function is not
2599	 * allowed to fail currently!
2600	 */
2601	for_each_present_cpu(cpu) {
2602		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2603		int cpustate = st->state;
2604
2605		if (cpustate >= state)
2606			cpuhp_issue_call(cpu, state, false, NULL);
2607	}
2608remove:
2609	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2610	mutex_unlock(&cpuhp_state_mutex);
2611}
2612EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2613
2614void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2615{
2616	cpus_read_lock();
2617	__cpuhp_remove_state_cpuslocked(state, invoke);
2618	cpus_read_unlock();
2619}
2620EXPORT_SYMBOL(__cpuhp_remove_state);
2621
2622#ifdef CONFIG_HOTPLUG_SMT
2623static void cpuhp_offline_cpu_device(unsigned int cpu)
2624{
2625	struct device *dev = get_cpu_device(cpu);
2626
2627	dev->offline = true;
2628	/* Tell user space about the state change */
2629	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2630}
2631
2632static void cpuhp_online_cpu_device(unsigned int cpu)
2633{
2634	struct device *dev = get_cpu_device(cpu);
2635
2636	dev->offline = false;
2637	/* Tell user space about the state change */
2638	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2639}
2640
2641int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2642{
2643	int cpu, ret = 0;
2644
2645	cpu_maps_update_begin();
2646	for_each_online_cpu(cpu) {
2647		if (topology_is_primary_thread(cpu))
2648			continue;
2649		/*
2650		 * Disable can be called with CPU_SMT_ENABLED when changing
2651		 * from a higher to lower number of SMT threads per core.
2652		 */
2653		if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
2654			continue;
2655		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2656		if (ret)
2657			break;
2658		/*
2659		 * As this needs to hold the cpu maps lock it's impossible
2660		 * to call device_offline() because that ends up calling
2661		 * cpu_down() which takes cpu maps lock. cpu maps lock
2662		 * needs to be held as this might race against in kernel
2663		 * abusers of the hotplug machinery (thermal management).
2664		 *
2665		 * So nothing would update device:offline state. That would
2666		 * leave the sysfs entry stale and prevent onlining after
2667		 * smt control has been changed to 'off' again. This is
2668		 * called under the sysfs hotplug lock, so it is properly
2669		 * serialized against the regular offline usage.
2670		 */
2671		cpuhp_offline_cpu_device(cpu);
2672	}
2673	if (!ret)
2674		cpu_smt_control = ctrlval;
2675	cpu_maps_update_done();
2676	return ret;
2677}
2678
2679int cpuhp_smt_enable(void)
2680{
2681	int cpu, ret = 0;
2682
2683	cpu_maps_update_begin();
2684	cpu_smt_control = CPU_SMT_ENABLED;
2685	for_each_present_cpu(cpu) {
2686		/* Skip online CPUs and CPUs on offline nodes */
2687		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2688			continue;
2689		if (!cpu_smt_thread_allowed(cpu))
2690			continue;
2691		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2692		if (ret)
2693			break;
2694		/* See comment in cpuhp_smt_disable() */
2695		cpuhp_online_cpu_device(cpu);
2696	}
2697	cpu_maps_update_done();
2698	return ret;
2699}
2700#endif
2701
2702#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2703static ssize_t state_show(struct device *dev,
2704			  struct device_attribute *attr, char *buf)
2705{
2706	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2707
2708	return sprintf(buf, "%d\n", st->state);
2709}
2710static DEVICE_ATTR_RO(state);
2711
2712static ssize_t target_store(struct device *dev, struct device_attribute *attr,
2713			    const char *buf, size_t count)
2714{
2715	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2716	struct cpuhp_step *sp;
2717	int target, ret;
2718
2719	ret = kstrtoint(buf, 10, &target);
2720	if (ret)
2721		return ret;
2722
2723#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2724	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2725		return -EINVAL;
2726#else
2727	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2728		return -EINVAL;
2729#endif
2730
2731	ret = lock_device_hotplug_sysfs();
2732	if (ret)
2733		return ret;
2734
2735	mutex_lock(&cpuhp_state_mutex);
2736	sp = cpuhp_get_step(target);
2737	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2738	mutex_unlock(&cpuhp_state_mutex);
2739	if (ret)
2740		goto out;
2741
2742	if (st->state < target)
2743		ret = cpu_up(dev->id, target);
2744	else if (st->state > target)
2745		ret = cpu_down(dev->id, target);
2746	else if (WARN_ON(st->target != target))
2747		st->target = target;
2748out:
2749	unlock_device_hotplug();
2750	return ret ? ret : count;
2751}
2752
2753static ssize_t target_show(struct device *dev,
2754			   struct device_attribute *attr, char *buf)
2755{
2756	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2757
2758	return sprintf(buf, "%d\n", st->target);
2759}
2760static DEVICE_ATTR_RW(target);
2761
2762static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
2763			  const char *buf, size_t count)
2764{
2765	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2766	struct cpuhp_step *sp;
2767	int fail, ret;
2768
2769	ret = kstrtoint(buf, 10, &fail);
2770	if (ret)
2771		return ret;
2772
2773	if (fail == CPUHP_INVALID) {
2774		st->fail = fail;
2775		return count;
2776	}
2777
2778	if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2779		return -EINVAL;
2780
2781	/*
2782	 * Cannot fail STARTING/DYING callbacks.
2783	 */
2784	if (cpuhp_is_atomic_state(fail))
2785		return -EINVAL;
2786
2787	/*
2788	 * DEAD callbacks cannot fail...
2789	 * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2790	 * triggering STARTING callbacks, a failure in this state would
2791	 * hinder rollback.
2792	 */
2793	if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2794		return -EINVAL;
2795
2796	/*
2797	 * Cannot fail anything that doesn't have callbacks.
2798	 */
2799	mutex_lock(&cpuhp_state_mutex);
2800	sp = cpuhp_get_step(fail);
2801	if (!sp->startup.single && !sp->teardown.single)
2802		ret = -EINVAL;
2803	mutex_unlock(&cpuhp_state_mutex);
2804	if (ret)
2805		return ret;
2806
2807	st->fail = fail;
2808
2809	return count;
2810}
2811
2812static ssize_t fail_show(struct device *dev,
2813			 struct device_attribute *attr, char *buf)
2814{
2815	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2816
2817	return sprintf(buf, "%d\n", st->fail);
2818}
2819
2820static DEVICE_ATTR_RW(fail);
2821
2822static struct attribute *cpuhp_cpu_attrs[] = {
2823	&dev_attr_state.attr,
2824	&dev_attr_target.attr,
2825	&dev_attr_fail.attr,
2826	NULL
2827};
2828
2829static const struct attribute_group cpuhp_cpu_attr_group = {
2830	.attrs = cpuhp_cpu_attrs,
2831	.name = "hotplug",
2832	NULL
2833};
2834
2835static ssize_t states_show(struct device *dev,
2836				 struct device_attribute *attr, char *buf)
2837{
2838	ssize_t cur, res = 0;
2839	int i;
2840
2841	mutex_lock(&cpuhp_state_mutex);
2842	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2843		struct cpuhp_step *sp = cpuhp_get_step(i);
2844
2845		if (sp->name) {
2846			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2847			buf += cur;
2848			res += cur;
2849		}
2850	}
2851	mutex_unlock(&cpuhp_state_mutex);
2852	return res;
2853}
2854static DEVICE_ATTR_RO(states);
2855
2856static struct attribute *cpuhp_cpu_root_attrs[] = {
2857	&dev_attr_states.attr,
2858	NULL
2859};
2860
2861static const struct attribute_group cpuhp_cpu_root_attr_group = {
2862	.attrs = cpuhp_cpu_root_attrs,
2863	.name = "hotplug",
2864	NULL
2865};
2866
2867#ifdef CONFIG_HOTPLUG_SMT
2868
2869static bool cpu_smt_num_threads_valid(unsigned int threads)
2870{
2871	if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
2872		return threads >= 1 && threads <= cpu_smt_max_threads;
2873	return threads == 1 || threads == cpu_smt_max_threads;
2874}
2875
2876static ssize_t
2877__store_smt_control(struct device *dev, struct device_attribute *attr,
2878		    const char *buf, size_t count)
2879{
2880	int ctrlval, ret, num_threads, orig_threads;
2881	bool force_off;
2882
2883	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2884		return -EPERM;
2885
2886	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2887		return -ENODEV;
2888
2889	if (sysfs_streq(buf, "on")) {
2890		ctrlval = CPU_SMT_ENABLED;
2891		num_threads = cpu_smt_max_threads;
2892	} else if (sysfs_streq(buf, "off")) {
2893		ctrlval = CPU_SMT_DISABLED;
2894		num_threads = 1;
2895	} else if (sysfs_streq(buf, "forceoff")) {
2896		ctrlval = CPU_SMT_FORCE_DISABLED;
2897		num_threads = 1;
2898	} else if (kstrtoint(buf, 10, &num_threads) == 0) {
2899		if (num_threads == 1)
2900			ctrlval = CPU_SMT_DISABLED;
2901		else if (cpu_smt_num_threads_valid(num_threads))
2902			ctrlval = CPU_SMT_ENABLED;
2903		else
2904			return -EINVAL;
2905	} else {
2906		return -EINVAL;
2907	}
2908
2909	ret = lock_device_hotplug_sysfs();
2910	if (ret)
2911		return ret;
2912
2913	orig_threads = cpu_smt_num_threads;
2914	cpu_smt_num_threads = num_threads;
2915
2916	force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;
2917
2918	if (num_threads > orig_threads)
2919		ret = cpuhp_smt_enable();
2920	else if (num_threads < orig_threads || force_off)
2921		ret = cpuhp_smt_disable(ctrlval);
2922
2923	unlock_device_hotplug();
2924	return ret ? ret : count;
2925}
2926
2927#else /* !CONFIG_HOTPLUG_SMT */
2928static ssize_t
2929__store_smt_control(struct device *dev, struct device_attribute *attr,
2930		    const char *buf, size_t count)
2931{
2932	return -ENODEV;
2933}
2934#endif /* CONFIG_HOTPLUG_SMT */
2935
2936static const char *smt_states[] = {
2937	[CPU_SMT_ENABLED]		= "on",
2938	[CPU_SMT_DISABLED]		= "off",
2939	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
2940	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
2941	[CPU_SMT_NOT_IMPLEMENTED]	= "notimplemented",
2942};
2943
2944static ssize_t control_show(struct device *dev,
2945			    struct device_attribute *attr, char *buf)
2946{
2947	const char *state = smt_states[cpu_smt_control];
2948
2949#ifdef CONFIG_HOTPLUG_SMT
2950	/*
2951	 * If SMT is enabled but not all threads are enabled then show the
2952	 * number of threads. If all threads are enabled show "on". Otherwise
2953	 * show the state name.
2954	 */
2955	if (cpu_smt_control == CPU_SMT_ENABLED &&
2956	    cpu_smt_num_threads != cpu_smt_max_threads)
2957		return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
2958#endif
2959
2960	return sysfs_emit(buf, "%s\n", state);
2961}
2962
2963static ssize_t control_store(struct device *dev, struct device_attribute *attr,
2964			     const char *buf, size_t count)
2965{
2966	return __store_smt_control(dev, attr, buf, count);
2967}
2968static DEVICE_ATTR_RW(control);
2969
2970static ssize_t active_show(struct device *dev,
2971			   struct device_attribute *attr, char *buf)
2972{
2973	return sysfs_emit(buf, "%d\n", sched_smt_active());
2974}
2975static DEVICE_ATTR_RO(active);
2976
2977static struct attribute *cpuhp_smt_attrs[] = {
2978	&dev_attr_control.attr,
2979	&dev_attr_active.attr,
2980	NULL
2981};
2982
2983static const struct attribute_group cpuhp_smt_attr_group = {
2984	.attrs = cpuhp_smt_attrs,
2985	.name = "smt",
2986	NULL
2987};
2988
2989static int __init cpu_smt_sysfs_init(void)
2990{
2991	struct device *dev_root;
2992	int ret = -ENODEV;
2993
2994	dev_root = bus_get_dev_root(&cpu_subsys);
2995	if (dev_root) {
2996		ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
2997		put_device(dev_root);
2998	}
2999	return ret;
3000}
3001
3002static int __init cpuhp_sysfs_init(void)
3003{
3004	struct device *dev_root;
3005	int cpu, ret;
3006
3007	ret = cpu_smt_sysfs_init();
3008	if (ret)
3009		return ret;
3010
3011	dev_root = bus_get_dev_root(&cpu_subsys);
3012	if (dev_root) {
3013		ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
3014		put_device(dev_root);
3015		if (ret)
3016			return ret;
3017	}
3018
3019	for_each_possible_cpu(cpu) {
3020		struct device *dev = get_cpu_device(cpu);
3021
3022		if (!dev)
3023			continue;
3024		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
3025		if (ret)
3026			return ret;
3027	}
3028	return 0;
3029}
3030device_initcall(cpuhp_sysfs_init);
3031#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
3032
3033/*
3034 * cpu_bit_bitmap[] is a special, "compressed" data structure that
3035 * represents all NR_CPUS bits binary values of 1<<nr.
3036 *
3037 * It is used by cpumask_of() to get a constant address to a CPU
3038 * mask value that has a single bit set only.
3039 */
3040
3041/* cpu_bit_bitmap[0] is empty - so we can back into it */
3042#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
3043#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
3044#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
3045#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
3046
3047const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
3048
3049	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
3050	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
3051#if BITS_PER_LONG > 32
3052	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
3053	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
3054#endif
3055};
3056EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
3057
3058const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
3059EXPORT_SYMBOL(cpu_all_bits);
3060
3061#ifdef CONFIG_INIT_ALL_POSSIBLE
3062struct cpumask __cpu_possible_mask __ro_after_init
3063	= {CPU_BITS_ALL};
3064#else
3065struct cpumask __cpu_possible_mask __ro_after_init;
3066#endif
3067EXPORT_SYMBOL(__cpu_possible_mask);
3068
3069struct cpumask __cpu_online_mask __read_mostly;
3070EXPORT_SYMBOL(__cpu_online_mask);
3071
3072struct cpumask __cpu_present_mask __read_mostly;
3073EXPORT_SYMBOL(__cpu_present_mask);
3074
3075struct cpumask __cpu_active_mask __read_mostly;
3076EXPORT_SYMBOL(__cpu_active_mask);
3077
3078struct cpumask __cpu_dying_mask __read_mostly;
3079EXPORT_SYMBOL(__cpu_dying_mask);
3080
3081atomic_t __num_online_cpus __read_mostly;
3082EXPORT_SYMBOL(__num_online_cpus);
3083
3084void init_cpu_present(const struct cpumask *src)
3085{
3086	cpumask_copy(&__cpu_present_mask, src);
3087}
3088
3089void init_cpu_possible(const struct cpumask *src)
3090{
3091	cpumask_copy(&__cpu_possible_mask, src);
3092}
3093
3094void init_cpu_online(const struct cpumask *src)
3095{
3096	cpumask_copy(&__cpu_online_mask, src);
3097}
3098
3099void set_cpu_online(unsigned int cpu, bool online)
3100{
3101	/*
3102	 * atomic_inc/dec() is required to handle the horrid abuse of this
3103	 * function by the reboot and kexec code which invoke it from
3104	 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
3105	 * regular CPU hotplug is properly serialized.
3106	 *
3107	 * Note, that the fact that __num_online_cpus is of type atomic_t
3108	 * does not protect readers which are not serialized against
3109	 * concurrent hotplug operations.
3110	 */
3111	if (online) {
3112		if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
3113			atomic_inc(&__num_online_cpus);
3114	} else {
3115		if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
3116			atomic_dec(&__num_online_cpus);
3117	}
3118}
3119
3120/*
3121 * Activate the first processor.
3122 */
3123void __init boot_cpu_init(void)
3124{
3125	int cpu = smp_processor_id();
3126
3127	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
3128	set_cpu_online(cpu, true);
3129	set_cpu_active(cpu, true);
3130	set_cpu_present(cpu, true);
3131	set_cpu_possible(cpu, true);
3132
3133#ifdef CONFIG_SMP
3134	__boot_cpu_id = cpu;
3135#endif
3136}
3137
3138/*
3139 * Must be called _AFTER_ setting up the per_cpu areas
3140 */
3141void __init boot_cpu_hotplug_init(void)
3142{
3143#ifdef CONFIG_SMP
3144	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
3145	atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
3146#endif
3147	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
3148	this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
3149}
3150
3151#ifdef CONFIG_CPU_MITIGATIONS
3152/*
3153 * These are used for a global "mitigations=" cmdline option for toggling
3154 * optional CPU mitigations.
3155 */
3156enum cpu_mitigations {
3157	CPU_MITIGATIONS_OFF,
3158	CPU_MITIGATIONS_AUTO,
3159	CPU_MITIGATIONS_AUTO_NOSMT,
3160};
3161
3162static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
3163
3164static int __init mitigations_parse_cmdline(char *arg)
3165{
3166	if (!strcmp(arg, "off"))
3167		cpu_mitigations = CPU_MITIGATIONS_OFF;
3168	else if (!strcmp(arg, "auto"))
3169		cpu_mitigations = CPU_MITIGATIONS_AUTO;
3170	else if (!strcmp(arg, "auto,nosmt"))
3171		cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
3172	else
3173		pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
3174			arg);
3175
3176	return 0;
3177}
3178
3179/* mitigations=off */
3180bool cpu_mitigations_off(void)
3181{
3182	return cpu_mitigations == CPU_MITIGATIONS_OFF;
3183}
3184EXPORT_SYMBOL_GPL(cpu_mitigations_off);
3185
3186/* mitigations=auto,nosmt */
3187bool cpu_mitigations_auto_nosmt(void)
3188{
3189	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3190}
3191EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
3192#else
3193static int __init mitigations_parse_cmdline(char *arg)
3194{
3195	pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n");
3196	return 0;
3197}
3198#endif
3199early_param("mitigations", mitigations_parse_cmdline);
3200