155099Skris// SPDX-License-Identifier: GPL-2.0-or-later 255099Skris/* 355099Skris * kernel/stop_machine.c 455099Skris * 555099Skris * Copyright (C) 2008, 2005 IBM Corporation. 655099Skris * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au 755099Skris * Copyright (C) 2010 SUSE Linux Products GmbH 8280304Sjkim * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 955099Skris */ 1055099Skris#include <linux/compiler.h> 1155099Skris#include <linux/completion.h> 1255099Skris#include <linux/cpu.h> 1355099Skris#include <linux/init.h> 1455099Skris#include <linux/kthread.h> 15280304Sjkim#include <linux/export.h> 1655099Skris#include <linux/percpu.h> 1755099Skris#include <linux/sched.h> 1855099Skris#include <linux/stop_machine.h> 1955099Skris#include <linux/interrupt.h> 2055099Skris#include <linux/kallsyms.h> 2155099Skris#include <linux/smpboot.h> 22280304Sjkim#include <linux/atomic.h> 2355099Skris#include <linux/nmi.h> 2455099Skris#include <linux/sched/wake_q.h> 2555099Skris 2655099Skris/* 2755099Skris * Structure to determine completion condition and record errors. May 2855099Skris * be shared by works on different cpus. 2955099Skris */ 3055099Skrisstruct cpu_stop_done { 3155099Skris atomic_t nr_todo; /* nr left to execute */ 3255099Skris int ret; /* collected return value */ 3355099Skris struct completion completion; /* fired if nr_todo reaches 0 */ 3455099Skris}; 3555099Skris 3655099Skris/* the actual stopper, one per every possible cpu, enabled on online cpus */ 37280304Sjkimstruct cpu_stopper { 3855099Skris struct task_struct *thread; 3955099Skris 40280304Sjkim raw_spinlock_t lock; 4155099Skris bool enabled; /* is this stopper enabled? */ 4255099Skris struct list_head works; /* list of pending works */ 4355099Skris 4455099Skris struct cpu_stop_work stop_work; /* for stop_cpus */ 4555099Skris unsigned long caller; 4655099Skris cpu_stop_fn_t fn; 4755099Skris}; 4855099Skris 4955099Skrisstatic DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 5055099Skrisstatic bool stop_machine_initialized = false; 5155099Skris 52280304Sjkimvoid print_stop_info(const char *log_lvl, struct task_struct *task) 5355099Skris{ 5455099Skris /* 5555099Skris * If @task is a stopper task, it cannot migrate and task_cpu() is 5655099Skris * stable. 5755099Skris */ 58160817Ssimon struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task)); 59162914Ssimon 60160817Ssimon if (task != stopper->thread) 61160817Ssimon return; 62160817Ssimon 63160817Ssimon printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller); 64160817Ssimon} 65160817Ssimon 66280304Sjkim/* static data for stop_cpus */ 67160817Ssimonstatic DEFINE_MUTEX(stop_cpus_mutex); 68160817Ssimonstatic bool stop_cpus_in_progress; 69160817Ssimon 70160817Ssimonstatic void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 71160817Ssimon{ 72160817Ssimon memset(done, 0, sizeof(*done)); 73160817Ssimon atomic_set(&done->nr_todo, nr_todo); 74160817Ssimon init_completion(&done->completion); 75160817Ssimon} 76160817Ssimon 77160817Ssimon/* signal completion unless @done is NULL */ 78160817Ssimonstatic void cpu_stop_signal_done(struct cpu_stop_done *done) 79160817Ssimon{ 80160817Ssimon if (atomic_dec_and_test(&done->nr_todo)) 81160817Ssimon complete(&done->completion); 82160817Ssimon} 83160817Ssimon 84160817Ssimonstatic void __cpu_stop_queue_work(struct cpu_stopper *stopper, 85160817Ssimon struct cpu_stop_work *work, 86160817Ssimon struct wake_q_head *wakeq) 87160817Ssimon{ 88160817Ssimon list_add_tail(&work->list, &stopper->works); 89160817Ssimon wake_q_add(wakeq, stopper->thread); 90160817Ssimon} 91160817Ssimon 92160817Ssimon/* queue @work to @stopper. if offline, @work is completed immediately */ 93160817Ssimonstatic bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) 94160817Ssimon{ 95160817Ssimon struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 96160817Ssimon DEFINE_WAKE_Q(wakeq); 97160817Ssimon unsigned long flags; 98160817Ssimon bool enabled; 99160817Ssimon 100160817Ssimon preempt_disable(); 101160817Ssimon raw_spin_lock_irqsave(&stopper->lock, flags); 102160817Ssimon enabled = stopper->enabled; 103160817Ssimon if (enabled) 104160817Ssimon __cpu_stop_queue_work(stopper, work, &wakeq); 105160817Ssimon else if (work->done) 106160817Ssimon cpu_stop_signal_done(work->done); 107160817Ssimon raw_spin_unlock_irqrestore(&stopper->lock, flags); 108160817Ssimon 109160817Ssimon wake_up_q(&wakeq); 110160817Ssimon preempt_enable(); 11155099Skris 11255099Skris return enabled; 11355099Skris} 11455099Skris 11555099Skris/** 11655099Skris * stop_one_cpu - stop a cpu 11755099Skris * @cpu: cpu to stop 118238405Sjkim * @fn: function to execute 11968654Skris * @arg: argument to @fn 120110007Smarkm * 121280304Sjkim * Execute @fn(@arg) on @cpu. @fn is run in a process context with 122110007Smarkm * the highest priority preempting any task on the cpu and 123280304Sjkim * monopolizing it. This function returns after the execution is 124110007Smarkm * complete. 125280304Sjkim * 126110007Smarkm * This function doesn't guarantee @cpu stays online till @fn 127280304Sjkim * completes. If @cpu goes down in the middle, execution may happen 128280304Sjkim * partially or fully on different cpus. @fn should either be ready 129280304Sjkim * for that or the caller should ensure that @cpu stays online until 13055099Skris * this function completes. 13155099Skris * 132280304Sjkim * CONTEXT: 133280304Sjkim * Might sleep. 134280304Sjkim * 135280304Sjkim * RETURNS: 136280304Sjkim * -ENOENT if @fn(@arg) was not executed because @cpu was offline; 137280304Sjkim * otherwise, the return value of @fn. 138280304Sjkim */ 139280304Sjkimint stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) 140280304Sjkim{ 141280304Sjkim struct cpu_stop_done done; 142280304Sjkim struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ }; 143280304Sjkim 144280304Sjkim cpu_stop_init_done(&done, 1); 145280304Sjkim if (!cpu_stop_queue_work(cpu, &work)) 146280304Sjkim return -ENOENT; 147280304Sjkim /* 148280304Sjkim * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup 14955099Skris * cycle by doing a preemption: 150110007Smarkm */ 151280304Sjkim cond_resched(); 152280304Sjkim wait_for_completion(&done.completion); 153280304Sjkim return done.ret; 15455099Skris} 155110007Smarkm 156280304Sjkim/* This controls the threads on each CPU. */ 157280304Sjkimenum multi_stop_state { 158280304Sjkim /* Dummy starting state for thread. */ 159280304Sjkim MULTI_STOP_NONE, 160280304Sjkim /* Awaiting everyone to be scheduled. */ 161280304Sjkim MULTI_STOP_PREPARE, 16255099Skris /* Disable interrupts. */ 163280304Sjkim MULTI_STOP_DISABLE_IRQ, 164280304Sjkim /* Run the function */ 165280304Sjkim MULTI_STOP_RUN, 166280304Sjkim /* Exit */ 167162914Ssimon MULTI_STOP_EXIT, 168280304Sjkim}; 169280304Sjkim 170280304Sjkimstruct multi_stop_data { 171280304Sjkim cpu_stop_fn_t fn; 172162914Ssimon void *data; 173280304Sjkim /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 174280304Sjkim unsigned int num_threads; 175280304Sjkim const struct cpumask *active_cpus; 176280304Sjkim 177280304Sjkim enum multi_stop_state state; 178280304Sjkim atomic_t thread_ack; 179280304Sjkim}; 18055099Skris 181280304Sjkimstatic void set_state(struct multi_stop_data *msdata, 182280304Sjkim enum multi_stop_state newstate) 183280304Sjkim{ 184280304Sjkim /* Reset ack counter. */ 185280304Sjkim atomic_set(&msdata->thread_ack, msdata->num_threads); 186280304Sjkim smp_wmb(); 187280304Sjkim WRITE_ONCE(msdata->state, newstate); 188280304Sjkim} 189280304Sjkim 190280304Sjkim/* Last one to ack a state moves to the next state. */ 191280304Sjkimstatic void ack_state(struct multi_stop_data *msdata) 19255099Skris{ 193280304Sjkim if (atomic_dec_and_test(&msdata->thread_ack)) 194280304Sjkim set_state(msdata, msdata->state + 1); 195280304Sjkim} 196280304Sjkim 197280304Sjkimnotrace void __weak stop_machine_yield(const struct cpumask *cpumask) 198280304Sjkim{ 199280304Sjkim cpu_relax(); 200280304Sjkim} 201280304Sjkim 202280304Sjkim/* This is the cpu_stop function which stops the CPU. */ 203280304Sjkimstatic int multi_cpu_stop(void *data) 204280304Sjkim{ 205280304Sjkim struct multi_stop_data *msdata = data; 206280304Sjkim enum multi_stop_state newstate, curstate = MULTI_STOP_NONE; 207280304Sjkim int cpu = smp_processor_id(), err = 0; 208280304Sjkim const struct cpumask *cpumask; 209280304Sjkim unsigned long flags; 210280304Sjkim bool is_active; 211280304Sjkim 212280304Sjkim /* 213280304Sjkim * When called from stop_machine_from_inactive_cpu(), irq might 21489840Skris * already be disabled. Save the state and restore it on exit. 215280304Sjkim */ 216280304Sjkim local_save_flags(flags); 217160817Ssimon 218280304Sjkim if (!msdata->active_cpus) { 219280304Sjkim cpumask = cpu_online_mask; 220280304Sjkim is_active = cpu == cpumask_first(cpumask); 221280304Sjkim } else { 222280304Sjkim cpumask = msdata->active_cpus; 223280304Sjkim is_active = cpumask_test_cpu(cpu, cpumask); 22455099Skris } 225280304Sjkim 226280304Sjkim /* Simple state machine */ 227280304Sjkim do { 228280304Sjkim /* Chill out and ensure we re-read multi_stop_state. */ 22955099Skris stop_machine_yield(cpumask); 230280304Sjkim newstate = READ_ONCE(msdata->state); 231280304Sjkim if (newstate != curstate) { 232280304Sjkim curstate = newstate; 23355099Skris switch (curstate) { 234280304Sjkim case MULTI_STOP_DISABLE_IRQ: 235280304Sjkim local_irq_disable(); 236280304Sjkim hard_irq_disable(); 237280304Sjkim break; 238280304Sjkim case MULTI_STOP_RUN: 239280304Sjkim if (is_active) 240280304Sjkim err = msdata->fn(msdata->data); 241280304Sjkim break; 242280304Sjkim default: 243280304Sjkim break; 244280304Sjkim } 245280304Sjkim ack_state(msdata); 246280304Sjkim } else if (curstate > MULTI_STOP_PREPARE) { 247280304Sjkim /* 248280304Sjkim * At this stage all other CPUs we depend on must spin 249280304Sjkim * in the same loop. Any reason for hard-lockup should 250280304Sjkim * be detected and reported on their side. 251280304Sjkim */ 252280304Sjkim touch_nmi_watchdog(); 253280304Sjkim } 254280304Sjkim rcu_momentary_dyntick_idle(); 255280304Sjkim } while (curstate != MULTI_STOP_EXIT); 256162914Ssimon 257160817Ssimon local_irq_restore(flags); 258280304Sjkim return err; 259280304Sjkim} 260280304Sjkim 261112446Sjedgarstatic int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, 262280304Sjkim int cpu2, struct cpu_stop_work *work2) 263162914Ssimon{ 264280304Sjkim struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); 265280304Sjkim struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); 266280304Sjkim DEFINE_WAKE_Q(wakeq); 267280304Sjkim int err; 268162914Ssimon 269280304Sjkimretry: 270280304Sjkim /* 271280304Sjkim * The waking up of stopper threads has to happen in the same 272112446Sjedgar * scheduling context as the queueing. Otherwise, there is a 273280304Sjkim * possibility of one of the above stoppers being woken up by another 274280304Sjkim * CPU, and preempting us. This will cause us to not wake up the other 275280304Sjkim * stopper forever. 276120635Snectar */ 277280304Sjkim preempt_disable(); 278280304Sjkim raw_spin_lock_irq(&stopper1->lock); 279280304Sjkim raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); 280162914Ssimon 281280304Sjkim if (!stopper1->enabled || !stopper2->enabled) { 282280304Sjkim err = -ENOENT; 283280304Sjkim goto unlock; 284162914Ssimon } 285280304Sjkim 286280304Sjkim /* 287280304Sjkim * Ensure that if we race with __stop_cpus() the stoppers won't get 288280304Sjkim * queued up in reverse order leading to system deadlock. 289280304Sjkim * 290280304Sjkim * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has 291162914Ssimon * queued a work on cpu1 but not on cpu2, we hold both locks. 292280304Sjkim * 293280304Sjkim * It can be falsely true but it is safe to spin until it is cleared, 294280304Sjkim * queue_stop_cpus_work() does everything under preempt_disable(). 295280304Sjkim */ 296280304Sjkim if (unlikely(stop_cpus_in_progress)) { 297280304Sjkim err = -EDEADLK; 298162914Ssimon goto unlock; 299280304Sjkim } 300280304Sjkim 301280304Sjkim err = 0; 302280304Sjkim __cpu_stop_queue_work(stopper1, work1, &wakeq); 303280304Sjkim __cpu_stop_queue_work(stopper2, work2, &wakeq); 304280304Sjkim 305162914Ssimonunlock: 306280304Sjkim raw_spin_unlock(&stopper2->lock); 307280304Sjkim raw_spin_unlock_irq(&stopper1->lock); 308280304Sjkim 309280304Sjkim if (unlikely(err == -EDEADLK)) { 310280304Sjkim preempt_enable(); 311160817Ssimon 312120635Snectar while (stop_cpus_in_progress) 313237657Sjkim cpu_relax(); 314280304Sjkim 315280304Sjkim goto retry; 316280304Sjkim } 317280304Sjkim 318280304Sjkim wake_up_q(&wakeq); 319280304Sjkim preempt_enable(); 320280304Sjkim 321280304Sjkim return err; 322280304Sjkim} 323280304Sjkim/** 324280304Sjkim * stop_two_cpus - stops two cpus 325280304Sjkim * @cpu1: the cpu to stop 326280304Sjkim * @cpu2: the other cpu to stop 327280304Sjkim * @fn: function to execute 328280304Sjkim * @arg: argument to @fn 329280304Sjkim * 330280304Sjkim * Stops both the current and specified CPU and runs @fn on one of them. 331280304Sjkim * 332120635Snectar * returns when both are completed. 333237657Sjkim */ 334280304Sjkimint stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) 335280304Sjkim{ 336280304Sjkim struct cpu_stop_done done; 337280304Sjkim struct cpu_stop_work work1, work2; 338280304Sjkim struct multi_stop_data msdata; 339280304Sjkim 340280304Sjkim msdata = (struct multi_stop_data){ 341280304Sjkim .fn = fn, 342280304Sjkim .data = arg, 343280304Sjkim .num_threads = 2, 344280304Sjkim .active_cpus = cpumask_of(cpu1), 345280304Sjkim }; 346237657Sjkim 34789840Skris work1 = work2 = (struct cpu_stop_work){ 348110007Smarkm .fn = multi_cpu_stop, 349280304Sjkim .arg = &msdata, 350280304Sjkim .done = &done, 351280304Sjkim .caller = _RET_IP_, 352280304Sjkim }; 353280304Sjkim 354280304Sjkim cpu_stop_init_done(&done, 2); 355280304Sjkim set_state(&msdata, MULTI_STOP_PREPARE); 356280304Sjkim 357280304Sjkim if (cpu1 > cpu2) 358280304Sjkim swap(cpu1, cpu2); 359280304Sjkim if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) 360280304Sjkim return -ENOENT; 361280304Sjkim 362280304Sjkim wait_for_completion(&done.completion); 36355099Skris return done.ret; 364280304Sjkim} 365280304Sjkim 366280304Sjkim/** 367280304Sjkim * stop_one_cpu_nowait - stop a cpu but don't wait for completion 368280304Sjkim * @cpu: cpu to stop 369280304Sjkim * @fn: function to execute 370280304Sjkim * @arg: argument to @fn 371280304Sjkim * @work_buf: pointer to cpu_stop_work structure 372280304Sjkim * 373280304Sjkim * Similar to stop_one_cpu() but doesn't wait for completion. The 374280304Sjkim * caller is responsible for ensuring @work_buf is currently unused 37555099Skris * and will remain untouched until stopper starts executing @fn. 376280304Sjkim * 377280304Sjkim * CONTEXT: 378280304Sjkim * Don't care. 379280304Sjkim * 380280304Sjkim * RETURNS: 381280304Sjkim * true if cpu_stop_work was queued successfully and @fn will be called, 382280304Sjkim * false otherwise. 383280304Sjkim */ 384280304Sjkimbool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 385280304Sjkim struct cpu_stop_work *work_buf) 386280304Sjkim{ 387280304Sjkim *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, }; 388280304Sjkim return cpu_stop_queue_work(cpu, work_buf); 389280304Sjkim} 390280304Sjkim 391280304Sjkimstatic bool queue_stop_cpus_work(const struct cpumask *cpumask, 392280304Sjkim cpu_stop_fn_t fn, void *arg, 39355099Skris struct cpu_stop_done *done) 394280304Sjkim{ 395280304Sjkim struct cpu_stop_work *work; 39655099Skris unsigned int cpu; 397280304Sjkim bool queued = false; 398280304Sjkim 399280304Sjkim /* 400280304Sjkim * Disable preemption while queueing to avoid getting 401280304Sjkim * preempted by a stopper which might wait for other stoppers 402280304Sjkim * to enter @fn which can lead to deadlock. 403112446Sjedgar */ 404280304Sjkim preempt_disable(); 405280304Sjkim stop_cpus_in_progress = true; 406280304Sjkim barrier(); 407280304Sjkim for_each_cpu(cpu, cpumask) { 408280304Sjkim work = &per_cpu(cpu_stopper.stop_work, cpu); 409280304Sjkim work->fn = fn; 410280304Sjkim work->arg = arg; 411160817Ssimon work->done = done; 412280304Sjkim work->caller = _RET_IP_; 413280304Sjkim if (cpu_stop_queue_work(cpu, work)) 414280304Sjkim queued = true; 415280304Sjkim } 416280304Sjkim barrier(); 417280304Sjkim stop_cpus_in_progress = false; 418280304Sjkim preempt_enable(); 419280304Sjkim 420160817Ssimon return queued; 421280304Sjkim} 422280304Sjkim 423280304Sjkimstatic int __stop_cpus(const struct cpumask *cpumask, 424280304Sjkim cpu_stop_fn_t fn, void *arg) 425280304Sjkim{ 426280304Sjkim struct cpu_stop_done done; 427280304Sjkim 428280304Sjkim cpu_stop_init_done(&done, cpumask_weight(cpumask)); 429280304Sjkim if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) 43055099Skris return -ENOENT; 431280304Sjkim wait_for_completion(&done.completion); 432280304Sjkim return done.ret; 433280304Sjkim} 434280304Sjkim 435280304Sjkim/** 436280304Sjkim * stop_cpus - stop multiple cpus 43755099Skris * @cpumask: cpus to stop 438280304Sjkim * @fn: function to execute 439280304Sjkim * @arg: argument to @fn 440280304Sjkim * 441280304Sjkim * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, 442160817Ssimon * @fn is run in a process context with the highest priority 443280304Sjkim * preempting any task on the cpu and monopolizing it. This function 444280304Sjkim * returns after all executions are complete. 445280304Sjkim * 446280304Sjkim * This function doesn't guarantee the cpus in @cpumask stay online 44755099Skris * till @fn completes. If some cpus go down in the middle, execution 448280304Sjkim * on the cpu may happen partially or fully on different cpus. @fn 449280304Sjkim * should either be ready for that or the caller should ensure that 450280304Sjkim * the cpus stay online until this function completes. 45155099Skris * 452280304Sjkim * All stop_cpus() calls are serialized making it safe for @fn to wait 453280304Sjkim * for all cpus to start executing it. 454280304Sjkim * 455280304Sjkim * CONTEXT: 456280304Sjkim * Might sleep. 457280304Sjkim * 458280304Sjkim * RETURNS: 459280304Sjkim * -ENOENT if @fn(@arg) was not executed at all because all cpus in 460280304Sjkim * @cpumask were offline; otherwise, 0 if all executions of @fn 461280304Sjkim * returned 0, any non zero return value if any returned non zero. 462280304Sjkim */ 463280304Sjkimstatic int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 464280304Sjkim{ 465280304Sjkim int ret; 466280304Sjkim 467280304Sjkim /* static works are used, process one request at a time */ 468280304Sjkim mutex_lock(&stop_cpus_mutex); 469280304Sjkim ret = __stop_cpus(cpumask, fn, arg); 470280304Sjkim mutex_unlock(&stop_cpus_mutex); 471280304Sjkim return ret; 472280304Sjkim} 473280304Sjkim 474280304Sjkimstatic int cpu_stop_should_run(unsigned int cpu) 475280304Sjkim{ 476280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 477280304Sjkim unsigned long flags; 478280304Sjkim int run; 479280304Sjkim 480280304Sjkim raw_spin_lock_irqsave(&stopper->lock, flags); 481280304Sjkim run = !list_empty(&stopper->works); 482280304Sjkim raw_spin_unlock_irqrestore(&stopper->lock, flags); 483110007Smarkm return run; 484280304Sjkim} 485280304Sjkim 486280304Sjkimstatic void cpu_stopper_thread(unsigned int cpu) 487280304Sjkim{ 488280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 489280304Sjkim struct cpu_stop_work *work; 490280304Sjkim 491280304Sjkimrepeat: 492280304Sjkim work = NULL; 493280304Sjkim raw_spin_lock_irq(&stopper->lock); 494280304Sjkim if (!list_empty(&stopper->works)) { 495280304Sjkim work = list_first_entry(&stopper->works, 496280304Sjkim struct cpu_stop_work, list); 497280304Sjkim list_del_init(&work->list); 498280304Sjkim } 49955099Skris raw_spin_unlock_irq(&stopper->lock); 500280304Sjkim 501280304Sjkim if (work) { 502280304Sjkim cpu_stop_fn_t fn = work->fn; 503280304Sjkim void *arg = work->arg; 504280304Sjkim struct cpu_stop_done *done = work->done; 505280304Sjkim int ret; 506280304Sjkim 507280304Sjkim /* cpu stop callbacks must not sleep, make in_atomic() == T */ 508280304Sjkim stopper->caller = work->caller; 509280304Sjkim stopper->fn = fn; 510280304Sjkim preempt_count_inc(); 51155099Skris ret = fn(arg); 512280304Sjkim if (done) { 513280304Sjkim if (ret) 514280304Sjkim done->ret = ret; 515280304Sjkim cpu_stop_signal_done(done); 516280304Sjkim } 517280304Sjkim preempt_count_dec(); 518280304Sjkim stopper->fn = NULL; 519280304Sjkim stopper->caller = 0; 520280304Sjkim WARN_ONCE(preempt_count(), 52155099Skris "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); 522280304Sjkim goto repeat; 523280304Sjkim } 524280304Sjkim} 52555099Skris 526280304Sjkimvoid stop_machine_park(int cpu) 527280304Sjkim{ 528280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 529280304Sjkim /* 530280304Sjkim * Lockless. cpu_stopper_thread() will take stopper->lock and flush 53189840Skris * the pending works before it parks, until then it is fine to queue 532280304Sjkim * the new works. 533280304Sjkim */ 534280304Sjkim stopper->enabled = false; 535280304Sjkim kthread_park(stopper->thread); 536280304Sjkim} 537280304Sjkim 538280304Sjkimstatic void cpu_stop_create(unsigned int cpu) 539112446Sjedgar{ 540280304Sjkim sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); 541280304Sjkim} 542280304Sjkim 543280304Sjkimstatic void cpu_stop_park(unsigned int cpu) 544280304Sjkim{ 545280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 546280304Sjkim 547280304Sjkim WARN_ON(!list_empty(&stopper->works)); 548160817Ssimon} 549280304Sjkim 550280304Sjkimvoid stop_machine_unpark(int cpu) 551280304Sjkim{ 552280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 553280304Sjkim 554280304Sjkim stopper->enabled = true; 555280304Sjkim kthread_unpark(stopper->thread); 556280304Sjkim} 557280304Sjkim 558280304Sjkimstatic struct smp_hotplug_thread cpu_stop_threads = { 55955099Skris .store = &cpu_stopper.thread, 560280304Sjkim .thread_should_run = cpu_stop_should_run, 561280304Sjkim .thread_fn = cpu_stopper_thread, 562280304Sjkim .thread_comm = "migration/%u", 563280304Sjkim .create = cpu_stop_create, 564280304Sjkim .park = cpu_stop_park, 56555099Skris .selfparking = true, 566280304Sjkim}; 567280304Sjkim 568280304Sjkimstatic int __init cpu_stop_init(void) 569280304Sjkim{ 570280304Sjkim unsigned int cpu; 571280304Sjkim 572280304Sjkim for_each_possible_cpu(cpu) { 573280304Sjkim struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 57455099Skris 575280304Sjkim raw_spin_lock_init(&stopper->lock); 576280304Sjkim INIT_LIST_HEAD(&stopper->works); 577280304Sjkim } 57855099Skris 579280304Sjkim BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); 580280304Sjkim stop_machine_unpark(raw_smp_processor_id()); 58155099Skris stop_machine_initialized = true; 582280304Sjkim return 0; 583280304Sjkim} 584280304Sjkimearly_initcall(cpu_stop_init); 585280304Sjkim 586280304Sjkimint stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, 587280304Sjkim const struct cpumask *cpus) 588280304Sjkim{ 589280304Sjkim struct multi_stop_data msdata = { 590280304Sjkim .fn = fn, 591280304Sjkim .data = data, 592280304Sjkim .num_threads = num_online_cpus(), 593280304Sjkim .active_cpus = cpus, 594280304Sjkim }; 595280304Sjkim 596280304Sjkim lockdep_assert_cpus_held(); 597280304Sjkim 598280304Sjkim if (!stop_machine_initialized) { 599280304Sjkim /* 600280304Sjkim * Handle the case where stop_machine() is called 601280304Sjkim * early in boot before stop_machine() has been 602280304Sjkim * initialized. 603280304Sjkim */ 604280304Sjkim unsigned long flags; 605280304Sjkim int ret; 606280304Sjkim 607280304Sjkim WARN_ON_ONCE(msdata.num_threads != 1); 608280304Sjkim 609280304Sjkim local_irq_save(flags); 610280304Sjkim hard_irq_disable(); 611280304Sjkim ret = (*fn)(data); 612280304Sjkim local_irq_restore(flags); 613280304Sjkim 614280304Sjkim return ret; 615280304Sjkim } 61689840Skris 617110007Smarkm /* Set the initial state and stop all online cpus. */ 618280304Sjkim set_state(&msdata, MULTI_STOP_PREPARE); 619280304Sjkim return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); 620280304Sjkim} 621280304Sjkim 622280304Sjkimint stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) 623280304Sjkim{ 624280304Sjkim int ret; 62555099Skris 626280304Sjkim /* No CPUs can come up or down during this. */ 627280304Sjkim cpus_read_lock(); 628280304Sjkim ret = stop_machine_cpuslocked(fn, data, cpus); 629280304Sjkim cpus_read_unlock(); 630162914Ssimon return ret; 631280304Sjkim} 632280304SjkimEXPORT_SYMBOL_GPL(stop_machine); 633280304Sjkim 634280304Sjkim#ifdef CONFIG_SCHED_SMT 635162914Ssimonint stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data) 636280304Sjkim{ 637280304Sjkim const struct cpumask *smt_mask = cpu_smt_mask(cpu); 638280304Sjkim 639280304Sjkim struct multi_stop_data msdata = { 640280304Sjkim .fn = fn, 641280304Sjkim .data = data, 642280304Sjkim .num_threads = cpumask_weight(smt_mask), 64355099Skris .active_cpus = smt_mask, 644280304Sjkim }; 645280304Sjkim 646280304Sjkim lockdep_assert_cpus_held(); 647280304Sjkim 648280304Sjkim /* Set the initial state and stop all online cpus. */ 649280304Sjkim set_state(&msdata, MULTI_STOP_PREPARE); 650280304Sjkim return stop_cpus(smt_mask, multi_cpu_stop, &msdata); 651280304Sjkim} 652280304SjkimEXPORT_SYMBOL_GPL(stop_core_cpuslocked); 653280304Sjkim#endif 654280304Sjkim 65555099Skris/** 656280304Sjkim * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU 657280304Sjkim * @fn: the function to run 658280304Sjkim * @data: the data ptr for the @fn() 659280304Sjkim * @cpus: the cpus to run the @fn() on (NULL = any online cpu) 660280304Sjkim * 661280304Sjkim * This is identical to stop_machine() but can be called from a CPU which 662280304Sjkim * is not active. The local CPU is in the process of hotplug (so no other 663280304Sjkim * CPU hotplug can start) and not marked active and doesn't have enough 66489840Skris * context to sleep. 665280304Sjkim * 666280304Sjkim * This function provides stop_machine() functionality for such state by 66789840Skris * using busy-wait for synchronization and executing @fn directly for local 668280304Sjkim * CPU. 669280304Sjkim * 670280304Sjkim * CONTEXT: 671280304Sjkim * Local CPU is inactive. Temporarily stops all active CPUs. 672280304Sjkim * 673160817Ssimon * RETURNS: 674280304Sjkim * 0 if all executions of @fn returned 0, any non zero return value if any 675280304Sjkim * returned non zero. 676280304Sjkim */ 677280304Sjkimint stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, 67855099Skris const struct cpumask *cpus) 679280304Sjkim{ 680280304Sjkim struct multi_stop_data msdata = { .fn = fn, .data = data, 681280304Sjkim .active_cpus = cpus }; 682160817Ssimon struct cpu_stop_done done; 683280304Sjkim int ret; 684280304Sjkim 685280304Sjkim /* Local CPU must be inactive and CPU hotplug in progress. */ 68655099Skris BUG_ON(cpu_active(raw_smp_processor_id())); 687280304Sjkim msdata.num_threads = num_active_cpus() + 1; /* +1 for local */ 688280304Sjkim 68955099Skris /* No proper task established and can't sleep - busy wait for lock. */ 690280304Sjkim while (!mutex_trylock(&stop_cpus_mutex)) 691280304Sjkim cpu_relax(); 692280304Sjkim 693280304Sjkim /* Schedule work on other CPUs and execute directly for local CPU */ 694280304Sjkim set_state(&msdata, MULTI_STOP_PREPARE); 695280304Sjkim cpu_stop_init_done(&done, num_active_cpus()); 696280304Sjkim queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, 697280304Sjkim &done); 698280304Sjkim ret = multi_cpu_stop(&msdata); 699280304Sjkim 700280304Sjkim /* Busy wait for completion. */ 701280304Sjkim while (!completion_done(&done.completion)) 702280304Sjkim cpu_relax(); 703280304Sjkim 704280304Sjkim mutex_unlock(&stop_cpus_mutex); 705280304Sjkim return ret ?: done.ret; 70655099Skris} 707280304Sjkim