multilist.c revision 288581
1/*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source.  A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15/*
16 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
17 */
18
19#include <sys/zfs_context.h>
20#include <sys/multilist.h>
21
22/* needed for spa_get_random() */
23#include <sys/spa.h>
24
25/*
26 * Given the object contained on the list, return a pointer to the
27 * object's multilist_node_t structure it contains.
28 */
29static multilist_node_t *
30multilist_d2l(multilist_t *ml, void *obj)
31{
32	return ((multilist_node_t *)((char *)obj + ml->ml_offset));
33}
34
35/*
36 * Initialize a new mutlilist using the parameters specified.
37 *
38 *  - 'size' denotes the size of the structure containing the
39 *     multilist_node_t.
40 *  - 'offset' denotes the byte offset of the mutlilist_node_t within
41 *     the structure that contains it.
42 *  - 'num' specifies the number of internal sublists to create.
43 *  - 'index_func' is used to determine which sublist to insert into
44 *     when the multilist_insert() function is called; as well as which
45 *     sublist to remove from when multilist_remove() is called. The
46 *     requirements this function must meet, are the following:
47 *
48 *      - It must always return the same value when called on the same
49 *        object (to ensure the object is removed from the list it was
50 *        inserted into).
51 *
52 *      - It must return a value in the range [0, number of sublists).
53 *        The multilist_get_num_sublists() function may be used to
54 *        determine the number of sublists in the multilist.
55 *
56 *     Also, in order to reduce internal contention between the sublists
57 *     during insertion and removal, this function should choose evenly
58 *     between all available sublists when inserting. This isn't a hard
59 *     requirement, but a general rule of thumb in order to garner the
60 *     best multi-threaded performance out of the data structure.
61 */
62void
63multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
64    multilist_sublist_index_func_t *index_func)
65{
66	ASSERT3P(ml, !=, NULL);
67	ASSERT3U(size, >, 0);
68	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
69	ASSERT3U(num, >, 0);
70	ASSERT3P(index_func, !=, NULL);
71
72	ml->ml_offset = offset;
73	ml->ml_num_sublists = num;
74	ml->ml_index_func = index_func;
75
76	ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
77	    ml->ml_num_sublists, KM_SLEEP);
78
79	ASSERT3P(ml->ml_sublists, !=, NULL);
80
81	for (int i = 0; i < ml->ml_num_sublists; i++) {
82		multilist_sublist_t *mls = &ml->ml_sublists[i];
83		mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
84		list_create(&mls->mls_list, size, offset);
85	}
86}
87
88/*
89 * Destroy the given multilist object, and free up any memory it holds.
90 */
91void
92multilist_destroy(multilist_t *ml)
93{
94	ASSERT(multilist_is_empty(ml));
95
96	for (int i = 0; i < ml->ml_num_sublists; i++) {
97		multilist_sublist_t *mls = &ml->ml_sublists[i];
98
99		ASSERT(list_is_empty(&mls->mls_list));
100
101		list_destroy(&mls->mls_list);
102		mutex_destroy(&mls->mls_lock);
103	}
104
105	ASSERT3P(ml->ml_sublists, !=, NULL);
106	kmem_free(ml->ml_sublists,
107	    sizeof (multilist_sublist_t) * ml->ml_num_sublists);
108
109	ml->ml_num_sublists = 0;
110	ml->ml_offset = 0;
111}
112
113/*
114 * Insert the given object into the multilist.
115 *
116 * This function will insert the object specified into the sublist
117 * determined using the function given at multilist creation time.
118 *
119 * The sublist locks are automatically acquired if not already held, to
120 * ensure consistency when inserting and removing from multiple threads.
121 */
122void
123multilist_insert(multilist_t *ml, void *obj)
124{
125	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
126	multilist_sublist_t *mls;
127	boolean_t need_lock;
128
129	DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
130	    unsigned int, sublist_idx, void *, obj);
131
132	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
133
134	mls = &ml->ml_sublists[sublist_idx];
135
136	/*
137	 * Note: Callers may already hold the sublist lock by calling
138	 * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
139	 * returning TRUE if and only if the current thread holds the
140	 * lock.  While it's a little ugly to make the lock recursive in
141	 * this way, it works and allows the calling code to be much
142	 * simpler -- otherwise it would have to pass around a flag
143	 * indicating that it already has the lock.
144	 */
145	need_lock = !MUTEX_HELD(&mls->mls_lock);
146
147	if (need_lock)
148		mutex_enter(&mls->mls_lock);
149
150	ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
151
152	multilist_sublist_insert_head(mls, obj);
153
154	if (need_lock)
155		mutex_exit(&mls->mls_lock);
156}
157
158/*
159 * Remove the given object from the multilist.
160 *
161 * This function will remove the object specified from the sublist
162 * determined using the function given at multilist creation time.
163 *
164 * The necessary sublist locks are automatically acquired, to ensure
165 * consistency when inserting and removing from multiple threads.
166 */
167void
168multilist_remove(multilist_t *ml, void *obj)
169{
170	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
171	multilist_sublist_t *mls;
172	boolean_t need_lock;
173
174	DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
175	    unsigned int, sublist_idx, void *, obj);
176
177	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
178
179	mls = &ml->ml_sublists[sublist_idx];
180	/* See comment in multilist_insert(). */
181	need_lock = !MUTEX_HELD(&mls->mls_lock);
182
183	if (need_lock)
184		mutex_enter(&mls->mls_lock);
185
186	ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
187
188	multilist_sublist_remove(mls, obj);
189
190	if (need_lock)
191		mutex_exit(&mls->mls_lock);
192}
193
194/*
195 * Check to see if this multilist object is empty.
196 *
197 * This will return TRUE if it finds all of the sublists of this
198 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
199 * automatically acquired as necessary.
200 *
201 * If concurrent insertions and removals are occurring, the semantics
202 * of this function become a little fuzzy. Instead of locking all
203 * sublists for the entire call time of the function, each sublist is
204 * only locked as it is individually checked for emptiness. Thus, it's
205 * possible for this function to return TRUE with non-empty sublists at
206 * the time the function returns. This would be due to another thread
207 * inserting into a given sublist, after that specific sublist was check
208 * and deemed empty, but before all sublists have been checked.
209 */
210int
211multilist_is_empty(multilist_t *ml)
212{
213	for (int i = 0; i < ml->ml_num_sublists; i++) {
214		multilist_sublist_t *mls = &ml->ml_sublists[i];
215		/* See comment in multilist_insert(). */
216		boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
217
218		if (need_lock)
219			mutex_enter(&mls->mls_lock);
220
221		if (!list_is_empty(&mls->mls_list)) {
222			if (need_lock)
223				mutex_exit(&mls->mls_lock);
224
225			return (FALSE);
226		}
227
228		if (need_lock)
229			mutex_exit(&mls->mls_lock);
230	}
231
232	return (TRUE);
233}
234
235/* Return the number of sublists composing this multilist */
236unsigned int
237multilist_get_num_sublists(multilist_t *ml)
238{
239	return (ml->ml_num_sublists);
240}
241
242/* Return a randomly selected, valid sublist index for this multilist */
243unsigned int
244multilist_get_random_index(multilist_t *ml)
245{
246	return (spa_get_random(ml->ml_num_sublists));
247}
248
249/* Lock and return the sublist specified at the given index */
250multilist_sublist_t *
251multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
252{
253	multilist_sublist_t *mls;
254
255	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
256	mls = &ml->ml_sublists[sublist_idx];
257	mutex_enter(&mls->mls_lock);
258
259	return (mls);
260}
261
262void
263multilist_sublist_unlock(multilist_sublist_t *mls)
264{
265	mutex_exit(&mls->mls_lock);
266}
267
268/*
269 * We're allowing any object to be inserted into this specific sublist,
270 * but this can lead to trouble if multilist_remove() is called to
271 * remove this object. Specifically, if calling ml_index_func on this
272 * object returns an index for sublist different than what is passed as
273 * a parameter here, any call to multilist_remove() with this newly
274 * inserted object is undefined! (the call to multilist_remove() will
275 * remove the object from a list that it isn't contained in)
276 */
277void
278multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
279{
280	ASSERT(MUTEX_HELD(&mls->mls_lock));
281	list_insert_head(&mls->mls_list, obj);
282}
283
284/* please see comment above multilist_sublist_insert_head */
285void
286multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
287{
288	ASSERT(MUTEX_HELD(&mls->mls_lock));
289	list_insert_tail(&mls->mls_list, obj);
290}
291
292/*
293 * Move the object one element forward in the list.
294 *
295 * This function will move the given object forward in the list (towards
296 * the head) by one object. So, in essence, it will swap its position in
297 * the list with its "prev" pointer. If the given object is already at the
298 * head of the list, it cannot be moved forward any more than it already
299 * is, so no action is taken.
300 *
301 * NOTE: This function **must not** remove any object from the list other
302 *       than the object given as the parameter. This is relied upon in
303 *       arc_evict_state_impl().
304 */
305void
306multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
307{
308	void *prev = list_prev(&mls->mls_list, obj);
309
310	ASSERT(MUTEX_HELD(&mls->mls_lock));
311	ASSERT(!list_is_empty(&mls->mls_list));
312
313	/* 'obj' must be at the head of the list, nothing to do */
314	if (prev == NULL)
315		return;
316
317	list_remove(&mls->mls_list, obj);
318	list_insert_before(&mls->mls_list, prev, obj);
319}
320
321void
322multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
323{
324	ASSERT(MUTEX_HELD(&mls->mls_lock));
325	list_remove(&mls->mls_list, obj);
326}
327
328void *
329multilist_sublist_head(multilist_sublist_t *mls)
330{
331	ASSERT(MUTEX_HELD(&mls->mls_lock));
332	return (list_head(&mls->mls_list));
333}
334
335void *
336multilist_sublist_tail(multilist_sublist_t *mls)
337{
338	ASSERT(MUTEX_HELD(&mls->mls_lock));
339	return (list_tail(&mls->mls_list));
340}
341
342void *
343multilist_sublist_next(multilist_sublist_t *mls, void *obj)
344{
345	ASSERT(MUTEX_HELD(&mls->mls_lock));
346	return (list_next(&mls->mls_list, obj));
347}
348
349void *
350multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
351{
352	ASSERT(MUTEX_HELD(&mls->mls_lock));
353	return (list_prev(&mls->mls_list, obj));
354}
355
356void
357multilist_link_init(multilist_node_t *link)
358{
359	list_link_init(link);
360}
361
362int
363multilist_link_active(multilist_node_t *link)
364{
365	return (list_link_active(link));
366}
367