1// SPDX-License-Identifier: GPL-2.0
2#include <linux/cgroup.h>
3#include <linux/sched.h>
4#include <linux/sched/task.h>
5#include <linux/sched/signal.h>
6
7#include "cgroup-internal.h"
8
9#include <trace/events/cgroup.h>
10
11/*
12 * Propagate the cgroup frozen state upwards by the cgroup tree.
13 */
14static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
15{
16	int desc = 1;
17
18	/*
19	 * If the new state is frozen, some freezing ancestor cgroups may change
20	 * their state too, depending on if all their descendants are frozen.
21	 *
22	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
23	 */
24	while ((cgrp = cgroup_parent(cgrp))) {
25		if (frozen) {
26			cgrp->freezer.nr_frozen_descendants += desc;
27			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
28			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
29			    cgrp->freezer.nr_frozen_descendants ==
30			    cgrp->nr_descendants) {
31				set_bit(CGRP_FROZEN, &cgrp->flags);
32				cgroup_file_notify(&cgrp->events_file);
33				TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
34				desc++;
35			}
36		} else {
37			cgrp->freezer.nr_frozen_descendants -= desc;
38			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
39				clear_bit(CGRP_FROZEN, &cgrp->flags);
40				cgroup_file_notify(&cgrp->events_file);
41				TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
42				desc++;
43			}
44		}
45	}
46}
47
48/*
49 * Revisit the cgroup frozen state.
50 * Checks if the cgroup is really frozen and perform all state transitions.
51 */
52void cgroup_update_frozen(struct cgroup *cgrp)
53{
54	bool frozen;
55
56	lockdep_assert_held(&css_set_lock);
57
58	/*
59	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
60	 * and all tasks are frozen and/or stopped, let's consider
61	 * the cgroup frozen. Otherwise it's not frozen.
62	 */
63	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
64		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
65
66	if (frozen) {
67		/* Already there? */
68		if (test_bit(CGRP_FROZEN, &cgrp->flags))
69			return;
70
71		set_bit(CGRP_FROZEN, &cgrp->flags);
72	} else {
73		/* Already there? */
74		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
75			return;
76
77		clear_bit(CGRP_FROZEN, &cgrp->flags);
78	}
79	cgroup_file_notify(&cgrp->events_file);
80	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
81
82	/* Update the state of ancestor cgroups. */
83	cgroup_propagate_frozen(cgrp, frozen);
84}
85
86/*
87 * Increment cgroup's nr_frozen_tasks.
88 */
89static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
90{
91	cgrp->freezer.nr_frozen_tasks++;
92}
93
94/*
95 * Decrement cgroup's nr_frozen_tasks.
96 */
97static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
98{
99	cgrp->freezer.nr_frozen_tasks--;
100	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
101}
102
103/*
104 * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
105 * and revisit the state of the cgroup, if necessary.
106 */
107void cgroup_enter_frozen(void)
108{
109	struct cgroup *cgrp;
110
111	if (current->frozen)
112		return;
113
114	spin_lock_irq(&css_set_lock);
115	current->frozen = true;
116	cgrp = task_dfl_cgroup(current);
117	cgroup_inc_frozen_cnt(cgrp);
118	cgroup_update_frozen(cgrp);
119	spin_unlock_irq(&css_set_lock);
120}
121
122/*
123 * Conditionally leave frozen/stopped state. Update cgroup's counters,
124 * and revisit the state of the cgroup, if necessary.
125 *
126 * If always_leave is not set, and the cgroup is freezing,
127 * we're racing with the cgroup freezing. In this case, we don't
128 * drop the frozen counter to avoid a transient switch to
129 * the unfrozen state.
130 */
131void cgroup_leave_frozen(bool always_leave)
132{
133	struct cgroup *cgrp;
134
135	spin_lock_irq(&css_set_lock);
136	cgrp = task_dfl_cgroup(current);
137	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
138		cgroup_dec_frozen_cnt(cgrp);
139		cgroup_update_frozen(cgrp);
140		WARN_ON_ONCE(!current->frozen);
141		current->frozen = false;
142	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143		spin_lock(&current->sighand->siglock);
144		current->jobctl |= JOBCTL_TRAP_FREEZE;
145		set_thread_flag(TIF_SIGPENDING);
146		spin_unlock(&current->sighand->siglock);
147	}
148	spin_unlock_irq(&css_set_lock);
149}
150
151/*
152 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
153 * jobctl bit.
154 */
155static void cgroup_freeze_task(struct task_struct *task, bool freeze)
156{
157	unsigned long flags;
158
159	/* If the task is about to die, don't bother with freezing it. */
160	if (!lock_task_sighand(task, &flags))
161		return;
162
163	if (freeze) {
164		task->jobctl |= JOBCTL_TRAP_FREEZE;
165		signal_wake_up(task, false);
166	} else {
167		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
168		wake_up_process(task);
169	}
170
171	unlock_task_sighand(task, &flags);
172}
173
174/*
175 * Freeze or unfreeze all tasks in the given cgroup.
176 */
177static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
178{
179	struct css_task_iter it;
180	struct task_struct *task;
181
182	lockdep_assert_held(&cgroup_mutex);
183
184	spin_lock_irq(&css_set_lock);
185	if (freeze)
186		set_bit(CGRP_FREEZE, &cgrp->flags);
187	else
188		clear_bit(CGRP_FREEZE, &cgrp->flags);
189	spin_unlock_irq(&css_set_lock);
190
191	if (freeze)
192		TRACE_CGROUP_PATH(freeze, cgrp);
193	else
194		TRACE_CGROUP_PATH(unfreeze, cgrp);
195
196	css_task_iter_start(&cgrp->self, 0, &it);
197	while ((task = css_task_iter_next(&it))) {
198		/*
199		 * Ignore kernel threads here. Freezing cgroups containing
200		 * kthreads isn't supported.
201		 */
202		if (task->flags & PF_KTHREAD)
203			continue;
204		cgroup_freeze_task(task, freeze);
205	}
206	css_task_iter_end(&it);
207
208	/*
209	 * Cgroup state should be revisited here to cover empty leaf cgroups
210	 * and cgroups which descendants are already in the desired state.
211	 */
212	spin_lock_irq(&css_set_lock);
213	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
214		cgroup_update_frozen(cgrp);
215	spin_unlock_irq(&css_set_lock);
216}
217
218/*
219 * Adjust the task state (freeze or unfreeze) and revisit the state of
220 * source and destination cgroups.
221 */
222void cgroup_freezer_migrate_task(struct task_struct *task,
223				 struct cgroup *src, struct cgroup *dst)
224{
225	lockdep_assert_held(&css_set_lock);
226
227	/*
228	 * Kernel threads are not supposed to be frozen at all.
229	 */
230	if (task->flags & PF_KTHREAD)
231		return;
232
233	/*
234	 * It's not necessary to do changes if both of the src and dst cgroups
235	 * are not freezing and task is not frozen.
236	 */
237	if (!test_bit(CGRP_FREEZE, &src->flags) &&
238	    !test_bit(CGRP_FREEZE, &dst->flags) &&
239	    !task->frozen)
240		return;
241
242	/*
243	 * Adjust counters of freezing and frozen tasks.
244	 * Note, that if the task is frozen, but the destination cgroup is not
245	 * frozen, we bump both counters to keep them balanced.
246	 */
247	if (task->frozen) {
248		cgroup_inc_frozen_cnt(dst);
249		cgroup_dec_frozen_cnt(src);
250	}
251	cgroup_update_frozen(dst);
252	cgroup_update_frozen(src);
253
254	/*
255	 * Force the task to the desired state.
256	 */
257	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
258}
259
260void cgroup_freeze(struct cgroup *cgrp, bool freeze)
261{
262	struct cgroup_subsys_state *css;
263	struct cgroup *dsct;
264	bool applied = false;
265
266	lockdep_assert_held(&cgroup_mutex);
267
268	/*
269	 * Nothing changed? Just exit.
270	 */
271	if (cgrp->freezer.freeze == freeze)
272		return;
273
274	cgrp->freezer.freeze = freeze;
275
276	/*
277	 * Propagate changes downwards the cgroup tree.
278	 */
279	css_for_each_descendant_pre(css, &cgrp->self) {
280		dsct = css->cgroup;
281
282		if (cgroup_is_dead(dsct))
283			continue;
284
285		if (freeze) {
286			dsct->freezer.e_freeze++;
287			/*
288			 * Already frozen because of ancestor's settings?
289			 */
290			if (dsct->freezer.e_freeze > 1)
291				continue;
292		} else {
293			dsct->freezer.e_freeze--;
294			/*
295			 * Still frozen because of ancestor's settings?
296			 */
297			if (dsct->freezer.e_freeze > 0)
298				continue;
299
300			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
301		}
302
303		/*
304		 * Do change actual state: freeze or unfreeze.
305		 */
306		cgroup_do_freeze(dsct, freeze);
307		applied = true;
308	}
309
310	/*
311	 * Even if the actual state hasn't changed, let's notify a user.
312	 * The state can be enforced by an ancestor cgroup: the cgroup
313	 * can already be in the desired state or it can be locked in the
314	 * opposite state, so that the transition will never happen.
315	 * In both cases it's better to notify a user, that there is
316	 * nothing to wait for.
317	 */
318	if (!applied) {
319		TRACE_CGROUP_PATH(notify_frozen, cgrp,
320				  test_bit(CGRP_FROZEN, &cgrp->flags));
321		cgroup_file_notify(&cgrp->events_file);
322	}
323}
324