1/*-
2 * SPDX-License-Identifier: Beerware
3 *
4 * ----------------------------------------------------------------------------
5 * "THE BEER-WARE LICENSE" (Revision 42):
6 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9 * ----------------------------------------------------------------------------
10 *
11 * The bioq_disksort() (and the specification of the bioq API)
12 * have been written by Luigi Rizzo and Fabio Checconi under the same
13 * license as above.
14 */
15
16#include <sys/cdefs.h>
17#include "opt_geom.h"
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/bio.h>
22#include <sys/conf.h>
23#include <sys/disk.h>
24#include <sys/sysctl.h>
25#include <geom/geom_disk.h>
26
27static int bioq_batchsize = 128;
28SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW,
29    &bioq_batchsize, 0, "BIOQ batch size");
30
31/*-
32 * Disk error is the preface to plaintive error messages
33 * about failing disk transfers.  It prints messages of the form
34 * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
35 * blkdone should be -1 if the position of the error is unknown.
36 * The message is printed with printf.
37 */
38void
39disk_err(struct bio *bp, const char *what, int blkdone, int nl)
40{
41	daddr_t sn;
42
43	if (bp->bio_dev != NULL)
44		printf("%s: %s ", devtoname(bp->bio_dev), what);
45	else if (bp->bio_disk != NULL)
46		printf("%s%d: %s ",
47		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
48	else
49		printf("disk??: %s ", what);
50	switch(bp->bio_cmd) {
51	case BIO_READ:		printf("cmd=read "); break;
52	case BIO_WRITE:		printf("cmd=write "); break;
53	case BIO_DELETE:	printf("cmd=delete "); break;
54	case BIO_GETATTR:	printf("cmd=getattr "); break;
55	case BIO_FLUSH:		printf("cmd=flush "); break;
56	default:		printf("cmd=%x ", bp->bio_cmd); break;
57	}
58	sn = bp->bio_pblkno;
59	if (bp->bio_bcount <= DEV_BSIZE) {
60		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
61		return;
62	}
63	if (blkdone >= 0) {
64		sn += blkdone;
65		printf("fsbn %jd of ", (intmax_t)sn);
66	}
67	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
68	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
69	if (nl)
70		printf("\n");
71}
72
73/*
74 * BIO queue implementation
75 *
76 * Please read carefully the description below before making any change
77 * to the code, or you might change the behaviour of the data structure
78 * in undesirable ways.
79 *
80 * A bioq stores disk I/O request (bio), normally sorted according to
81 * the distance of the requested position (bio->bio_offset) from the
82 * current head position (bioq->last_offset) in the scan direction, i.e.
83 *
84 * 	(uoff_t)(bio_offset - last_offset)
85 *
86 * Note that the cast to unsigned (uoff_t) is fundamental to insure
87 * that the distance is computed in the scan direction.
88 *
89 * The main methods for manipulating the bioq are:
90 *
91 *   bioq_disksort()	performs an ordered insertion;
92 *
93 *   bioq_first()	return the head of the queue, without removing;
94 *
95 *   bioq_takefirst()	return and remove the head of the queue,
96 *		updating the 'current head position' as
97 *		bioq->last_offset = bio->bio_offset + bio->bio_length;
98 *
99 * When updating the 'current head position', we assume that the result of
100 * bioq_takefirst() is dispatched to the device, so bioq->last_offset
101 * represents the head position once the request is complete.
102 *
103 * If the bioq is manipulated using only the above calls, it starts
104 * with a sorted sequence of requests with bio_offset >= last_offset,
105 * possibly followed by another sorted sequence of requests with
106 * 0 <= bio_offset < bioq->last_offset
107 *
108 * NOTE: historical behaviour was to ignore bio->bio_length in the
109 *	update, but its use tracks the head position in a better way.
110 *	Historical behaviour was also to update the head position when
111 *	the request under service is complete, rather than when the
112 *	request is extracted from the queue. However, the current API
113 *	has no method to update the head position; secondly, once
114 *	a request has been submitted to the disk, we have no idea of
115 *	the actual head position, so the final one is our best guess.
116 *
117 * --- Direct queue manipulation ---
118 *
119 * A bioq uses an underlying TAILQ to store requests, so we also
120 * export methods to manipulate the TAILQ, in particular:
121 *
122 * bioq_insert_tail()	insert an entry at the end.
123 *		It also creates a 'barrier' so all subsequent
124 *		insertions through bioq_disksort() will end up
125 *		after this entry;
126 *
127 * bioq_insert_head()	insert an entry at the head, update
128 *		bioq->last_offset = bio->bio_offset so that
129 *		all subsequent insertions through bioq_disksort()
130 *		will end up after this entry;
131 *
132 * bioq_remove()	remove a generic element from the queue, act as
133 *		bioq_takefirst() if invoked on the head of the queue.
134 *
135 * The semantic of these methods is the same as the operations
136 * on the underlying TAILQ, but with additional guarantees on
137 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
138 * can be useful for making sure that all previous ops are flushed
139 * to disk before continuing.
140 *
141 * Updating bioq->last_offset on a bioq_insert_head() guarantees
142 * that the bio inserted with the last bioq_insert_head() will stay
143 * at the head of the queue even after subsequent bioq_disksort().
144 *
145 * Note that when the direct queue manipulation functions are used,
146 * the queue may contain multiple inversion points (i.e. more than
147 * two sorted sequences of requests).
148 *
149 */
150
151void
152bioq_init(struct bio_queue_head *head)
153{
154
155	TAILQ_INIT(&head->queue);
156	head->last_offset = 0;
157	head->insert_point = NULL;
158	head->total = 0;
159	head->batched = 0;
160}
161
162void
163bioq_remove(struct bio_queue_head *head, struct bio *bp)
164{
165
166	if (head->insert_point == NULL) {
167		if (bp == TAILQ_FIRST(&head->queue))
168			head->last_offset = bp->bio_offset + bp->bio_length;
169	} else if (bp == head->insert_point)
170		head->insert_point = NULL;
171
172	TAILQ_REMOVE(&head->queue, bp, bio_queue);
173	if (TAILQ_EMPTY(&head->queue))
174		head->batched = 0;
175	head->total--;
176}
177
178void
179bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
180{
181	struct bio *bp;
182
183	while ((bp = bioq_takefirst(head)) != NULL)
184		biofinish(bp, stp, error);
185}
186
187void
188bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
189{
190
191	if (head->insert_point == NULL)
192		head->last_offset = bp->bio_offset;
193	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
194	head->total++;
195	head->batched = 0;
196}
197
198void
199bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
200{
201
202	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
203	head->total++;
204	head->batched = 0;
205	head->insert_point = bp;
206	head->last_offset = bp->bio_offset;
207}
208
209struct bio *
210bioq_first(struct bio_queue_head *head)
211{
212
213	return (TAILQ_FIRST(&head->queue));
214}
215
216struct bio *
217bioq_takefirst(struct bio_queue_head *head)
218{
219	struct bio *bp;
220
221	bp = TAILQ_FIRST(&head->queue);
222	if (bp != NULL)
223		bioq_remove(head, bp);
224	return (bp);
225}
226
227/*
228 * Compute the sorting key. The cast to unsigned is
229 * fundamental for correctness, see the description
230 * near the beginning of the file.
231 */
232static inline uoff_t
233bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
234{
235
236	return ((uoff_t)(bp->bio_offset - head->last_offset));
237}
238
239/*
240 * Seek sort for disks.
241 *
242 * Sort all requests in a single queue while keeping
243 * track of the current position of the disk with last_offset.
244 * See above for details.
245 */
246void
247bioq_disksort(struct bio_queue_head *head, struct bio *bp)
248{
249	struct bio *cur, *prev;
250	uoff_t key;
251
252	if ((bp->bio_flags & BIO_ORDERED) != 0) {
253		/*
254		 * Ordered transactions can only be dispatched
255		 * after any currently queued transactions.  They
256		 * also have barrier semantics - no transactions
257		 * queued in the future can pass them.
258		 */
259		bioq_insert_tail(head, bp);
260		return;
261	}
262
263	/*
264	 * We should only sort requests of types that have concept of offset.
265	 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
266	 * of ordering even if strict ordering is not requested explicitly.
267	 */
268	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
269	    bp->bio_cmd != BIO_DELETE) {
270		bioq_insert_tail(head, bp);
271		return;
272	}
273
274	if (bioq_batchsize > 0 && head->batched > bioq_batchsize) {
275		bioq_insert_tail(head, bp);
276		return;
277	}
278
279	prev = NULL;
280	key = bioq_bio_key(head, bp);
281	cur = TAILQ_FIRST(&head->queue);
282
283	if (head->insert_point) {
284		prev = head->insert_point;
285		cur = TAILQ_NEXT(head->insert_point, bio_queue);
286	}
287
288	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
289		prev = cur;
290		cur = TAILQ_NEXT(cur, bio_queue);
291	}
292
293	if (prev == NULL)
294		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
295	else
296		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
297	head->total++;
298	head->batched++;
299}
300