1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2008 Yahoo!, Inc.
5 * All rights reserved.
6 * Written by: John Baldwin <jhb@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD$");
35
36#include <sys/param.h>
37#include <sys/kernel.h>
38#include <sys/bio.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/proc.h>
42#include <sys/sglist.h>
43#include <sys/uio.h>
44
45#include <vm/vm.h>
46#include <vm/vm_page.h>
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49
50#include <sys/ktr.h>
51
52static MALLOC_DEFINE(M_SGLIST, "sglist", "scatter/gather lists");
53
54/*
55 * Convenience macros to save the state of an sglist so it can be restored
56 * if an append attempt fails.  Since sglist's only grow we only need to
57 * save the current count of segments and the length of the ending segment.
58 * Earlier segments will not be changed by an append, and the only change
59 * that can occur to the ending segment is that it can be extended.
60 */
61struct sgsave {
62	u_short sg_nseg;
63	size_t ss_len;
64};
65
66#define	SGLIST_SAVE(sg, sgsave) do {					\
67	(sgsave).sg_nseg = (sg)->sg_nseg;				\
68	if ((sgsave).sg_nseg > 0)					\
69		(sgsave).ss_len = (sg)->sg_segs[(sgsave).sg_nseg - 1].ss_len; \
70	else								\
71		(sgsave).ss_len = 0;					\
72} while (0)
73
74#define	SGLIST_RESTORE(sg, sgsave) do {					\
75	(sg)->sg_nseg = (sgsave).sg_nseg;				\
76	if ((sgsave).sg_nseg > 0)					\
77		(sg)->sg_segs[(sgsave).sg_nseg - 1].ss_len = (sgsave).ss_len; \
78} while (0)
79
80/*
81 * Append a single (paddr, len) to a sglist.  sg is the list and ss is
82 * the current segment in the list.  If we run out of segments then
83 * EFBIG will be returned.
84 */
85static __inline int
86_sglist_append_range(struct sglist *sg, struct sglist_seg **ssp,
87    vm_paddr_t paddr, size_t len)
88{
89	struct sglist_seg *ss;
90
91	ss = *ssp;
92	if (ss->ss_paddr + ss->ss_len == paddr)
93		ss->ss_len += len;
94	else {
95		if (sg->sg_nseg == sg->sg_maxseg)
96			return (EFBIG);
97		ss++;
98		ss->ss_paddr = paddr;
99		ss->ss_len = len;
100		sg->sg_nseg++;
101		*ssp = ss;
102	}
103	return (0);
104}
105
106/*
107 * Worker routine to append a virtual address range (either kernel or
108 * user) to a scatter/gather list.
109 */
110static __inline int
111_sglist_append_buf(struct sglist *sg, void *buf, size_t len, pmap_t pmap,
112    size_t *donep)
113{
114	struct sglist_seg *ss;
115	vm_offset_t vaddr, offset;
116	vm_paddr_t paddr;
117	size_t seglen;
118	int error;
119
120	if (donep)
121		*donep = 0;
122	if (len == 0)
123		return (0);
124
125	/* Do the first page.  It may have an offset. */
126	vaddr = (vm_offset_t)buf;
127	offset = vaddr & PAGE_MASK;
128	if (pmap != NULL)
129		paddr = pmap_extract(pmap, vaddr);
130	else
131		paddr = pmap_kextract(vaddr);
132	seglen = MIN(len, PAGE_SIZE - offset);
133	if (sg->sg_nseg == 0) {
134		ss = sg->sg_segs;
135		ss->ss_paddr = paddr;
136		ss->ss_len = seglen;
137		sg->sg_nseg = 1;
138	} else {
139		ss = &sg->sg_segs[sg->sg_nseg - 1];
140		error = _sglist_append_range(sg, &ss, paddr, seglen);
141		if (error)
142			return (error);
143	}
144	vaddr += seglen;
145	len -= seglen;
146	if (donep)
147		*donep += seglen;
148
149	while (len > 0) {
150		seglen = MIN(len, PAGE_SIZE);
151		if (pmap != NULL)
152			paddr = pmap_extract(pmap, vaddr);
153		else
154			paddr = pmap_kextract(vaddr);
155		error = _sglist_append_range(sg, &ss, paddr, seglen);
156		if (error)
157			return (error);
158		vaddr += seglen;
159		len -= seglen;
160		if (donep)
161			*donep += seglen;
162	}
163
164	return (0);
165}
166
167/*
168 * Determine the number of scatter/gather list elements needed to
169 * describe a kernel virtual address range.
170 */
171int
172sglist_count(void *buf, size_t len)
173{
174	vm_offset_t vaddr, vendaddr;
175	vm_paddr_t lastaddr, paddr;
176	int nsegs;
177
178	if (len == 0)
179		return (0);
180
181	vaddr = trunc_page((vm_offset_t)buf);
182	vendaddr = (vm_offset_t)buf + len;
183	nsegs = 1;
184	lastaddr = pmap_kextract(vaddr);
185	vaddr += PAGE_SIZE;
186	while (vaddr < vendaddr) {
187		paddr = pmap_kextract(vaddr);
188		if (lastaddr + PAGE_SIZE != paddr)
189			nsegs++;
190		lastaddr = paddr;
191		vaddr += PAGE_SIZE;
192	}
193	return (nsegs);
194}
195
196/*
197 * Determine the number of scatter/gather list elements needed to
198 * describe a buffer backed by an array of VM pages.
199 */
200int
201sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_t len)
202{
203	vm_paddr_t lastaddr, paddr;
204	int i, nsegs;
205
206	if (len == 0)
207		return (0);
208
209	len += pgoff;
210	nsegs = 1;
211	lastaddr = VM_PAGE_TO_PHYS(m[0]);
212	for (i = 1; len > PAGE_SIZE; len -= PAGE_SIZE, i++) {
213		paddr = VM_PAGE_TO_PHYS(m[i]);
214		if (lastaddr + PAGE_SIZE != paddr)
215			nsegs++;
216		lastaddr = paddr;
217	}
218	return (nsegs);
219}
220
221/*
222 * Determine the number of scatter/gather list elements needed to
223 * describe an M_EXTPG mbuf.
224 */
225int
226sglist_count_mbuf_epg(struct mbuf *m, size_t off, size_t len)
227{
228	vm_paddr_t nextaddr, paddr;
229	size_t seglen, segoff;
230	int i, nsegs, pglen, pgoff;
231
232	if (len == 0)
233		return (0);
234
235	nsegs = 0;
236	if (m->m_epg_hdrlen != 0) {
237		if (off >= m->m_epg_hdrlen) {
238			off -= m->m_epg_hdrlen;
239		} else {
240			seglen = m->m_epg_hdrlen - off;
241			segoff = off;
242			seglen = MIN(seglen, len);
243			off = 0;
244			len -= seglen;
245			nsegs += sglist_count(&m->m_epg_hdr[segoff],
246			    seglen);
247		}
248	}
249	nextaddr = 0;
250	pgoff = m->m_epg_1st_off;
251	for (i = 0; i < m->m_epg_npgs && len > 0; i++) {
252		pglen = m_epg_pagelen(m, i, pgoff);
253		if (off >= pglen) {
254			off -= pglen;
255			pgoff = 0;
256			continue;
257		}
258		seglen = pglen - off;
259		segoff = pgoff + off;
260		off = 0;
261		seglen = MIN(seglen, len);
262		len -= seglen;
263		paddr = m->m_epg_pa[i] + segoff;
264		if (paddr != nextaddr)
265			nsegs++;
266		nextaddr = paddr + seglen;
267		pgoff = 0;
268	};
269	if (len != 0) {
270		seglen = MIN(len, m->m_epg_trllen - off);
271		len -= seglen;
272		nsegs += sglist_count(&m->m_epg_trail[off], seglen);
273	}
274	KASSERT(len == 0, ("len != 0"));
275	return (nsegs);
276}
277
278/*
279 * Allocate a scatter/gather list along with 'nsegs' segments.  The
280 * 'mflags' parameters are the same as passed to malloc(9).  The caller
281 * should use sglist_free() to free this list.
282 */
283struct sglist *
284sglist_alloc(int nsegs, int mflags)
285{
286	struct sglist *sg;
287
288	sg = malloc(sizeof(struct sglist) + nsegs * sizeof(struct sglist_seg),
289	    M_SGLIST, mflags);
290	if (sg == NULL)
291		return (NULL);
292	sglist_init(sg, nsegs, (struct sglist_seg *)(sg + 1));
293	return (sg);
294}
295
296/*
297 * Free a scatter/gather list allocated via sglist_allc().
298 */
299void
300sglist_free(struct sglist *sg)
301{
302
303	if (sg == NULL)
304		return;
305
306	if (refcount_release(&sg->sg_refs))
307		free(sg, M_SGLIST);
308}
309
310/*
311 * Append the segments to describe a single kernel virtual address
312 * range to a scatter/gather list.  If there are insufficient
313 * segments, then this fails with EFBIG.
314 */
315int
316sglist_append(struct sglist *sg, void *buf, size_t len)
317{
318	struct sgsave save;
319	int error;
320
321	if (sg->sg_maxseg == 0)
322		return (EINVAL);
323	SGLIST_SAVE(sg, save);
324	error = _sglist_append_buf(sg, buf, len, NULL, NULL);
325	if (error)
326		SGLIST_RESTORE(sg, save);
327	return (error);
328}
329
330/*
331 * Append the segments to describe a bio's data to a scatter/gather list.
332 * If there are insufficient segments, then this fails with EFBIG.
333 *
334 * NOTE: This function expects bio_bcount to be initialized.
335 */
336int
337sglist_append_bio(struct sglist *sg, struct bio *bp)
338{
339	int error;
340
341	if ((bp->bio_flags & BIO_UNMAPPED) == 0)
342		error = sglist_append(sg, bp->bio_data, bp->bio_bcount);
343	else
344		error = sglist_append_vmpages(sg, bp->bio_ma,
345		    bp->bio_ma_offset, bp->bio_bcount);
346	return (error);
347}
348
349/*
350 * Append a single physical address range to a scatter/gather list.
351 * If there are insufficient segments, then this fails with EFBIG.
352 */
353int
354sglist_append_phys(struct sglist *sg, vm_paddr_t paddr, size_t len)
355{
356	struct sglist_seg *ss;
357	struct sgsave save;
358	int error;
359
360	if (sg->sg_maxseg == 0)
361		return (EINVAL);
362	if (len == 0)
363		return (0);
364
365	if (sg->sg_nseg == 0) {
366		sg->sg_segs[0].ss_paddr = paddr;
367		sg->sg_segs[0].ss_len = len;
368		sg->sg_nseg = 1;
369		return (0);
370	}
371	ss = &sg->sg_segs[sg->sg_nseg - 1];
372	SGLIST_SAVE(sg, save);
373	error = _sglist_append_range(sg, &ss, paddr, len);
374	if (error)
375		SGLIST_RESTORE(sg, save);
376	return (error);
377}
378
379/*
380 * Append the segments of single multi-page mbuf.
381 * If there are insufficient segments, then this fails with EFBIG.
382 */
383int
384sglist_append_mbuf_epg(struct sglist *sg, struct mbuf *m, size_t off,
385    size_t len)
386{
387	size_t seglen, segoff;
388	vm_paddr_t paddr;
389	int error, i, pglen, pgoff;
390
391	M_ASSERTEXTPG(m);
392
393	error = 0;
394	if (m->m_epg_hdrlen != 0) {
395		if (off >= m->m_epg_hdrlen) {
396			off -= m->m_epg_hdrlen;
397		} else {
398			seglen = m->m_epg_hdrlen - off;
399			segoff = off;
400			seglen = MIN(seglen, len);
401			off = 0;
402			len -= seglen;
403			error = sglist_append(sg,
404			    &m->m_epg_hdr[segoff], seglen);
405		}
406	}
407	pgoff = m->m_epg_1st_off;
408	for (i = 0; i < m->m_epg_npgs && error == 0 && len > 0; i++) {
409		pglen = m_epg_pagelen(m, i, pgoff);
410		if (off >= pglen) {
411			off -= pglen;
412			pgoff = 0;
413			continue;
414		}
415		seglen = pglen - off;
416		segoff = pgoff + off;
417		off = 0;
418		seglen = MIN(seglen, len);
419		len -= seglen;
420		paddr = m->m_epg_pa[i] + segoff;
421		error = sglist_append_phys(sg, paddr, seglen);
422		pgoff = 0;
423	};
424	if (error == 0 && len > 0) {
425		seglen = MIN(len, m->m_epg_trllen - off);
426		len -= seglen;
427		error = sglist_append(sg,
428		    &m->m_epg_trail[off], seglen);
429	}
430	if (error == 0)
431		KASSERT(len == 0, ("len != 0"));
432	return (error);
433}
434
435/*
436 * Append the segments that describe a single mbuf chain to a
437 * scatter/gather list.  If there are insufficient segments, then this
438 * fails with EFBIG.
439 */
440int
441sglist_append_mbuf(struct sglist *sg, struct mbuf *m0)
442{
443	struct sgsave save;
444	struct mbuf *m;
445	int error;
446
447	if (sg->sg_maxseg == 0)
448		return (EINVAL);
449
450	error = 0;
451	SGLIST_SAVE(sg, save);
452	for (m = m0; m != NULL; m = m->m_next) {
453		if (m->m_len > 0) {
454			if ((m->m_flags & M_EXTPG) != 0)
455				error = sglist_append_mbuf_epg(sg, m,
456				    mtod(m, vm_offset_t), m->m_len);
457			else
458				error = sglist_append(sg, m->m_data,
459				    m->m_len);
460			if (error) {
461				SGLIST_RESTORE(sg, save);
462				return (error);
463			}
464		}
465	}
466	return (0);
467}
468
469/*
470 * Append the segments that describe a buffer spanning an array of VM
471 * pages.  The buffer begins at an offset of 'pgoff' in the first
472 * page.
473 */
474int
475sglist_append_vmpages(struct sglist *sg, vm_page_t *m, size_t pgoff,
476    size_t len)
477{
478	struct sgsave save;
479	struct sglist_seg *ss;
480	vm_paddr_t paddr;
481	size_t seglen;
482	int error, i;
483
484	if (sg->sg_maxseg == 0)
485		return (EINVAL);
486	if (len == 0)
487		return (0);
488
489	SGLIST_SAVE(sg, save);
490	i = 0;
491	if (sg->sg_nseg == 0) {
492		seglen = min(PAGE_SIZE - pgoff, len);
493		sg->sg_segs[0].ss_paddr = VM_PAGE_TO_PHYS(m[0]) + pgoff;
494		sg->sg_segs[0].ss_len = seglen;
495		sg->sg_nseg = 1;
496		pgoff = 0;
497		len -= seglen;
498		i++;
499	}
500	ss = &sg->sg_segs[sg->sg_nseg - 1];
501	for (; len > 0; i++, len -= seglen) {
502		seglen = min(PAGE_SIZE - pgoff, len);
503		paddr = VM_PAGE_TO_PHYS(m[i]) + pgoff;
504		error = _sglist_append_range(sg, &ss, paddr, seglen);
505		if (error) {
506			SGLIST_RESTORE(sg, save);
507			return (error);
508		}
509		pgoff = 0;
510	}
511	return (0);
512}
513
514/*
515 * Append the segments that describe a single user address range to a
516 * scatter/gather list.  If there are insufficient segments, then this
517 * fails with EFBIG.
518 */
519int
520sglist_append_user(struct sglist *sg, void *buf, size_t len, struct thread *td)
521{
522	struct sgsave save;
523	int error;
524
525	if (sg->sg_maxseg == 0)
526		return (EINVAL);
527	SGLIST_SAVE(sg, save);
528	error = _sglist_append_buf(sg, buf, len,
529	    vmspace_pmap(td->td_proc->p_vmspace), NULL);
530	if (error)
531		SGLIST_RESTORE(sg, save);
532	return (error);
533}
534
535/*
536 * Append a subset of an existing scatter/gather list 'source' to a
537 * the scatter/gather list 'sg'.  If there are insufficient segments,
538 * then this fails with EFBIG.
539 */
540int
541sglist_append_sglist(struct sglist *sg, struct sglist *source, size_t offset,
542    size_t length)
543{
544	struct sgsave save;
545	struct sglist_seg *ss;
546	size_t seglen;
547	int error, i;
548
549	if (sg->sg_maxseg == 0 || length == 0)
550		return (EINVAL);
551	SGLIST_SAVE(sg, save);
552	error = EINVAL;
553	ss = &sg->sg_segs[sg->sg_nseg - 1];
554	for (i = 0; i < source->sg_nseg; i++) {
555		if (offset >= source->sg_segs[i].ss_len) {
556			offset -= source->sg_segs[i].ss_len;
557			continue;
558		}
559		seglen = source->sg_segs[i].ss_len - offset;
560		if (seglen > length)
561			seglen = length;
562		error = _sglist_append_range(sg, &ss,
563		    source->sg_segs[i].ss_paddr + offset, seglen);
564		if (error)
565			break;
566		offset = 0;
567		length -= seglen;
568		if (length == 0)
569			break;
570	}
571	if (length != 0)
572		error = EINVAL;
573	if (error)
574		SGLIST_RESTORE(sg, save);
575	return (error);
576}
577
578/*
579 * Append the segments that describe a single uio to a scatter/gather
580 * list.  If there are insufficient segments, then this fails with
581 * EFBIG.
582 */
583int
584sglist_append_uio(struct sglist *sg, struct uio *uio)
585{
586	struct iovec *iov;
587	struct sgsave save;
588	size_t resid, minlen;
589	pmap_t pmap;
590	int error, i;
591
592	if (sg->sg_maxseg == 0)
593		return (EINVAL);
594
595	resid = uio->uio_resid;
596	iov = uio->uio_iov;
597
598	if (uio->uio_segflg == UIO_USERSPACE) {
599		KASSERT(uio->uio_td != NULL,
600		    ("sglist_append_uio: USERSPACE but no thread"));
601		pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
602	} else
603		pmap = NULL;
604
605	error = 0;
606	SGLIST_SAVE(sg, save);
607	for (i = 0; i < uio->uio_iovcnt && resid != 0; i++) {
608		/*
609		 * Now at the first iovec to load.  Load each iovec
610		 * until we have exhausted the residual count.
611		 */
612		minlen = MIN(resid, iov[i].iov_len);
613		if (minlen > 0) {
614			error = _sglist_append_buf(sg, iov[i].iov_base, minlen,
615			    pmap, NULL);
616			if (error) {
617				SGLIST_RESTORE(sg, save);
618				return (error);
619			}
620			resid -= minlen;
621		}
622	}
623	return (0);
624}
625
626/*
627 * Append the segments that describe at most 'resid' bytes from a
628 * single uio to a scatter/gather list.  If there are insufficient
629 * segments, then only the amount that fits is appended.
630 */
631int
632sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid)
633{
634	struct iovec *iov;
635	size_t done;
636	pmap_t pmap;
637	int error, len;
638
639	if (sg->sg_maxseg == 0)
640		return (EINVAL);
641
642	if (uio->uio_segflg == UIO_USERSPACE) {
643		KASSERT(uio->uio_td != NULL,
644		    ("sglist_consume_uio: USERSPACE but no thread"));
645		pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
646	} else
647		pmap = NULL;
648
649	error = 0;
650	while (resid > 0 && uio->uio_resid) {
651		iov = uio->uio_iov;
652		len = iov->iov_len;
653		if (len == 0) {
654			uio->uio_iov++;
655			uio->uio_iovcnt--;
656			continue;
657		}
658		if (len > resid)
659			len = resid;
660
661		/*
662		 * Try to append this iovec.  If we run out of room,
663		 * then break out of the loop.
664		 */
665		error = _sglist_append_buf(sg, iov->iov_base, len, pmap, &done);
666		iov->iov_base = (char *)iov->iov_base + done;
667		iov->iov_len -= done;
668		uio->uio_resid -= done;
669		uio->uio_offset += done;
670		resid -= done;
671		if (error)
672			break;
673	}
674	return (0);
675}
676
677/*
678 * Allocate and populate a scatter/gather list to describe a single
679 * kernel virtual address range.
680 */
681struct sglist *
682sglist_build(void *buf, size_t len, int mflags)
683{
684	struct sglist *sg;
685	int nsegs;
686
687	if (len == 0)
688		return (NULL);
689
690	nsegs = sglist_count(buf, len);
691	sg = sglist_alloc(nsegs, mflags);
692	if (sg == NULL)
693		return (NULL);
694	if (sglist_append(sg, buf, len) != 0) {
695		sglist_free(sg);
696		return (NULL);
697	}
698	return (sg);
699}
700
701/*
702 * Clone a new copy of a scatter/gather list.
703 */
704struct sglist *
705sglist_clone(struct sglist *sg, int mflags)
706{
707	struct sglist *new;
708
709	if (sg == NULL)
710		return (NULL);
711	new = sglist_alloc(sg->sg_maxseg, mflags);
712	if (new == NULL)
713		return (NULL);
714	new->sg_nseg = sg->sg_nseg;
715	bcopy(sg->sg_segs, new->sg_segs, sizeof(struct sglist_seg) *
716	    sg->sg_nseg);
717	return (new);
718}
719
720/*
721 * Calculate the total length of the segments described in a
722 * scatter/gather list.
723 */
724size_t
725sglist_length(struct sglist *sg)
726{
727	size_t space;
728	int i;
729
730	space = 0;
731	for (i = 0; i < sg->sg_nseg; i++)
732		space += sg->sg_segs[i].ss_len;
733	return (space);
734}
735
736/*
737 * Split a scatter/gather list into two lists.  The scatter/gather
738 * entries for the first 'length' bytes of the 'original' list are
739 * stored in the '*head' list and are removed from 'original'.
740 *
741 * If '*head' is NULL, then a new list will be allocated using
742 * 'mflags'.  If M_NOWAIT is specified and the allocation fails,
743 * ENOMEM will be returned.
744 *
745 * If '*head' is not NULL, it should point to an empty sglist.  If it
746 * does not have enough room for the remaining space, then EFBIG will
747 * be returned.  If '*head' is not empty, then EINVAL will be
748 * returned.
749 *
750 * If 'original' is shared (refcount > 1), then EDOOFUS will be
751 * returned.
752 */
753int
754sglist_split(struct sglist *original, struct sglist **head, size_t length,
755    int mflags)
756{
757	struct sglist *sg;
758	size_t space, split;
759	int count, i;
760
761	if (original->sg_refs > 1)
762		return (EDOOFUS);
763
764	/* Figure out how big of a sglist '*head' has to hold. */
765	count = 0;
766	space = 0;
767	split = 0;
768	for (i = 0; i < original->sg_nseg; i++) {
769		space += original->sg_segs[i].ss_len;
770		count++;
771		if (space >= length) {
772			/*
773			 * If 'length' falls in the middle of a
774			 * scatter/gather list entry, then 'split'
775			 * holds how much of that entry will remain in
776			 * 'original'.
777			 */
778			split = space - length;
779			break;
780		}
781	}
782
783	/* Nothing to do, so leave head empty. */
784	if (count == 0)
785		return (0);
786
787	if (*head == NULL) {
788		sg = sglist_alloc(count, mflags);
789		if (sg == NULL)
790			return (ENOMEM);
791		*head = sg;
792	} else {
793		sg = *head;
794		if (sg->sg_maxseg < count)
795			return (EFBIG);
796		if (sg->sg_nseg != 0)
797			return (EINVAL);
798	}
799
800	/* Copy 'count' entries to 'sg' from 'original'. */
801	bcopy(original->sg_segs, sg->sg_segs, count *
802	    sizeof(struct sglist_seg));
803	sg->sg_nseg = count;
804
805	/*
806	 * If we had to split a list entry, fixup the last entry in
807	 * 'sg' and the new first entry in 'original'.  We also
808	 * decrement 'count' by 1 since we will only be removing
809	 * 'count - 1' segments from 'original' now.
810	 */
811	if (split != 0) {
812		count--;
813		sg->sg_segs[count].ss_len -= split;
814		original->sg_segs[count].ss_paddr =
815		    sg->sg_segs[count].ss_paddr + split;
816		original->sg_segs[count].ss_len = split;
817	}
818
819	/* Trim 'count' entries from the front of 'original'. */
820	original->sg_nseg -= count;
821	bcopy(original->sg_segs + count, original->sg_segs, count *
822	    sizeof(struct sglist_seg));
823	return (0);
824}
825
826/*
827 * Append the scatter/gather list elements in 'second' to the
828 * scatter/gather list 'first'.  If there is not enough space in
829 * 'first', EFBIG is returned.
830 */
831int
832sglist_join(struct sglist *first, struct sglist *second)
833{
834	struct sglist_seg *flast, *sfirst;
835	int append;
836
837	/* If 'second' is empty, there is nothing to do. */
838	if (second->sg_nseg == 0)
839		return (0);
840
841	/*
842	 * If the first entry in 'second' can be appended to the last entry
843	 * in 'first' then set append to '1'.
844	 */
845	append = 0;
846	flast = &first->sg_segs[first->sg_nseg - 1];
847	sfirst = &second->sg_segs[0];
848	if (first->sg_nseg != 0 &&
849	    flast->ss_paddr + flast->ss_len == sfirst->ss_paddr)
850		append = 1;
851
852	/* Make sure 'first' has enough room. */
853	if (first->sg_nseg + second->sg_nseg - append > first->sg_maxseg)
854		return (EFBIG);
855
856	/* Merge last in 'first' and first in 'second' if needed. */
857	if (append)
858		flast->ss_len += sfirst->ss_len;
859
860	/* Append new segments from 'second' to 'first'. */
861	bcopy(first->sg_segs + first->sg_nseg, second->sg_segs + append,
862	    (second->sg_nseg - append) * sizeof(struct sglist_seg));
863	first->sg_nseg += second->sg_nseg - append;
864	sglist_reset(second);
865	return (0);
866}
867
868/*
869 * Generate a new scatter/gather list from a range of an existing
870 * scatter/gather list.  The 'offset' and 'length' parameters specify
871 * the logical range of the 'original' list to extract.  If that range
872 * is not a subset of the length of 'original', then EINVAL is
873 * returned.  The new scatter/gather list is stored in '*slice'.
874 *
875 * If '*slice' is NULL, then a new list will be allocated using
876 * 'mflags'.  If M_NOWAIT is specified and the allocation fails,
877 * ENOMEM will be returned.
878 *
879 * If '*slice' is not NULL, it should point to an empty sglist.  If it
880 * does not have enough room for the remaining space, then EFBIG will
881 * be returned.  If '*slice' is not empty, then EINVAL will be
882 * returned.
883 */
884int
885sglist_slice(struct sglist *original, struct sglist **slice, size_t offset,
886    size_t length, int mflags)
887{
888	struct sglist *sg;
889	size_t space, end, foffs, loffs;
890	int count, i, fseg;
891
892	/* Nothing to do. */
893	if (length == 0)
894		return (0);
895
896	/* Figure out how many segments '*slice' needs to have. */
897	end = offset + length;
898	space = 0;
899	count = 0;
900	fseg = 0;
901	foffs = loffs = 0;
902	for (i = 0; i < original->sg_nseg; i++) {
903		space += original->sg_segs[i].ss_len;
904		if (space > offset) {
905			/*
906			 * When we hit the first segment, store its index
907			 * in 'fseg' and the offset into the first segment
908			 * of 'offset' in 'foffs'.
909			 */
910			if (count == 0) {
911				fseg = i;
912				foffs = offset - (space -
913				    original->sg_segs[i].ss_len);
914				CTR1(KTR_DEV, "sglist_slice: foffs = %08lx",
915				    foffs);
916			}
917			count++;
918
919			/*
920			 * When we hit the last segment, break out of
921			 * the loop.  Store the amount of extra space
922			 * at the end of this segment in 'loffs'.
923			 */
924			if (space >= end) {
925				loffs = space - end;
926				CTR1(KTR_DEV, "sglist_slice: loffs = %08lx",
927				    loffs);
928				break;
929			}
930		}
931	}
932
933	/* If we never hit 'end', then 'length' ran off the end, so fail. */
934	if (space < end)
935		return (EINVAL);
936
937	if (*slice == NULL) {
938		sg = sglist_alloc(count, mflags);
939		if (sg == NULL)
940			return (ENOMEM);
941		*slice = sg;
942	} else {
943		sg = *slice;
944		if (sg->sg_maxseg < count)
945			return (EFBIG);
946		if (sg->sg_nseg != 0)
947			return (EINVAL);
948	}
949
950	/*
951	 * Copy over 'count' segments from 'original' starting at
952	 * 'fseg' to 'sg'.
953	 */
954	bcopy(original->sg_segs + fseg, sg->sg_segs,
955	    count * sizeof(struct sglist_seg));
956	sg->sg_nseg = count;
957
958	/* Fixup first and last segments if needed. */
959	if (foffs != 0) {
960		sg->sg_segs[0].ss_paddr += foffs;
961		sg->sg_segs[0].ss_len -= foffs;
962		CTR2(KTR_DEV, "sglist_slice seg[0]: %08lx:%08lx",
963		    (long)sg->sg_segs[0].ss_paddr, sg->sg_segs[0].ss_len);
964	}
965	if (loffs != 0) {
966		sg->sg_segs[count - 1].ss_len -= loffs;
967		CTR2(KTR_DEV, "sglist_slice seg[%d]: len %08x", count - 1,
968		    sg->sg_segs[count - 1].ss_len);
969	}
970	return (0);
971}
972