1/*	$OpenBSD: buf.h,v 1.114 2024/02/03 18:51:58 beck Exp $	*/
2/*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
3
4/*
5 * Copyright (c) 1982, 1986, 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)buf.h	8.7 (Berkeley) 1/21/94
38 */
39
40#ifndef _SYS_BUF_H_
41#define	_SYS_BUF_H_
42#include <sys/queue.h>
43#include <sys/tree.h>
44#include <sys/mutex.h>
45#include <uvm/uvm_extern.h>
46
47#define NOLIST ((struct buf *)0x87654321)
48
49struct buf;
50struct vnode;
51
52LIST_HEAD(bufhead, buf);
53
54/*
55 * Buffer queues
56 */
57#define BUFQ_NSCAN_N	128
58#define BUFQ_FIFO	0
59#define BUFQ_NSCAN	1
60#define BUFQ_DEFAULT	BUFQ_NSCAN
61#define BUFQ_HOWMANY	2
62
63/*
64 * Write limits for bufq - defines high and low water marks for how
65 * many kva slots are allowed to be consumed to parallelize writes from
66 * the buffer cache from any individual bufq.
67 */
68#define BUFQ_HI		128
69#define BUFQ_LOW	64
70
71struct bufq_impl;
72
73struct bufq {
74	SLIST_ENTRY(bufq)	 bufq_entries;
75	struct mutex	 	 bufq_mtx;
76	void			*bufq_data;
77	u_int			 bufq_outstanding;
78	u_int			 bufq_hi;
79	u_int			 bufq_low;
80	int			 bufq_waiting;
81	int			 bufq_stop;
82	int			 bufq_type;
83	const struct bufq_impl	*bufq_impl;
84};
85
86int		 bufq_init(struct bufq *, int);
87int		 bufq_switch(struct bufq *, int);
88void		 bufq_destroy(struct bufq *);
89
90void		 bufq_queue(struct bufq *, struct buf *);
91struct buf	*bufq_dequeue(struct bufq *);
92void		 bufq_requeue(struct bufq *, struct buf *);
93int		 bufq_peek(struct bufq *);
94void		 bufq_drain(struct bufq *);
95
96void		 bufq_wait(struct bufq *);
97void		 bufq_done(struct bufq *, struct buf *);
98void		 bufq_quiesce(void);
99void		 bufq_restart(void);
100
101/* fifo */
102SIMPLEQ_HEAD(bufq_fifo_head, buf);
103struct bufq_fifo {
104	SIMPLEQ_ENTRY(buf)	bqf_entries;
105};
106
107/* nscan */
108SIMPLEQ_HEAD(bufq_nscan_head, buf);
109struct bufq_nscan {
110	SIMPLEQ_ENTRY(buf)	bqf_entries;
111};
112
113/* bufq link in struct buf */
114union bufq_data {
115	struct bufq_fifo	bufq_data_fifo;
116	struct bufq_nscan	bufq_data_nscan;
117};
118
119/* The buffer header describes an I/O operation in the kernel. */
120struct buf {
121	RBT_ENTRY(buf) b_rbbufs;	/* vnode "hash" tree */
122	LIST_ENTRY(buf) b_list;		/* All allocated buffers. */
123	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
124	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
125	int cache;			/* which cache are we in */
126	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
127	volatile long	b_flags;	/* B_* flags. */
128	long	b_bufsize;		/* Allocated buffer size. */
129	long	b_bcount;		/* Valid bytes in buffer. */
130	size_t	b_resid;		/* Remaining I/O. */
131	int	b_error;		/* Errno value. */
132	dev_t	b_dev;			/* Device associated with buffer. */
133	caddr_t	b_data;			/* associated data */
134	void	*b_saveaddr;		/* Original b_data for physio. */
135
136	TAILQ_ENTRY(buf) b_valist;	/* LRU of va to reuse. */
137
138	union	bufq_data b_bufq;
139	struct	bufq	  *b_bq;	/* What bufq this buf is on */
140
141	struct uvm_object *b_pobj;
142	struct uvm_object b_uobj;	/* Object containing the pages */
143	off_t	b_poffs;		/* Offset within object */
144
145	daddr_t	b_lblkno;		/* Logical block number. */
146	daddr_t	b_blkno;		/* Underlying physical block number. */
147					/* Function to call upon completion.
148					 * Will be called at splbio(). */
149	void	(*b_iodone)(struct buf *);
150	struct	vnode *b_vp;		/* Device vnode. */
151	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
152	int	b_dirtyend;		/* Offset of end of dirty region. */
153	int	b_validoff;		/* Offset in buffer of valid region. */
154	int	b_validend;		/* Offset of end of valid region. */
155};
156
157TAILQ_HEAD(bufqueue, buf);
158
159struct bufcache {
160	int64_t hotbufpages;
161	int64_t warmbufpages;
162	int64_t cachepages;
163	struct bufqueue hotqueue;
164	struct bufqueue coldqueue;
165	struct bufqueue warmqueue;
166};
167
168/* Device driver compatibility definitions. */
169#define	b_active b_bcount		/* Driver queue head: drive active. */
170
171/*
172 * These flags are kept in b_flags.
173 */
174#define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
175#define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
176#define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
177#define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
178#define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
179#define	B_BUSY		0x00000010	/* I/O in progress. */
180#define	B_CACHE		0x00000020	/* Bread found us in the cache. */
181#define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
182#define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
183#define	B_DONE		0x00000100	/* I/O completed. */
184#define	B_EINTR		0x00000200	/* I/O was interrupted */
185#define	B_ERROR		0x00000400	/* I/O error occurred. */
186#define	B_INVAL		0x00000800	/* Does not contain valid info. */
187#define	B_NOCACHE	0x00001000	/* Do not cache block after use. */
188#define	B_PHYS		0x00002000	/* I/O to user memory. */
189#define	B_RAW		0x00004000	/* Set by physio for raw transfers. */
190#define	B_READ		0x00008000	/* Read buffer. */
191#define	B_WANTED	0x00010000	/* Process wants this buffer. */
192#define	B_WRITEINPROG	0x00020000	/* Write in progress. */
193#define	B_XXX		0x00040000	/* Debugging flag. */
194#define	B_DEFERRED	0x00080000	/* Skipped over for cleaning */
195#define	B_SCANNED	0x00100000	/* Block already pushed during sync */
196#define	B_PDAEMON	0x00200000	/* I/O started by pagedaemon */
197#define	B_RELEASED	0x00400000	/* free this buffer after its kvm */
198#define	B_WARM		0x00800000	/* buffer is or has been on the warm queue */
199#define	B_COLD		0x01000000	/* buffer is on the cold queue */
200#define	B_BC		0x02000000	/* buffer is managed by the cache */
201#define	B_DMA		0x04000000	/* buffer is DMA reachable */
202
203#define	B_BITS	"\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
204    "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
205    "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
206    "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
207    "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
208
209/*
210 * Zero out the buffer's data area.
211 */
212#define	clrbuf(bp) {							\
213	bzero((bp)->b_data, (bp)->b_bcount);				\
214	(bp)->b_resid = 0;						\
215}
216
217
218/* Flags to low-level allocation routines. */
219#define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
220#define B_SYNC		0x02	/* Do all allocations synchronously. */
221
222struct cluster_info {
223	daddr_t	ci_lastr;	/* last read (read-ahead) */
224	daddr_t	ci_lastw;	/* last write (write cluster) */
225	daddr_t	ci_cstart;	/* start block of cluster */
226	daddr_t	ci_lasta;	/* last allocation */
227	int	ci_clen; 	/* length of current cluster */
228	int	ci_ralen;	/* Read-ahead length */
229	daddr_t	ci_maxra;	/* last readahead block */
230};
231
232#ifdef _KERNEL
233__BEGIN_DECLS
234/* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
235#define RESERVE_SLOTS 4
236/* Buffer cache pages reserved for syncer and cleaner. */
237#define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
238/* Minimum size of the buffer cache, in pages. */
239#define BCACHE_MIN (RESERVE_PAGES * 2)
240#define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
241
242extern struct proc *cleanerproc;
243extern long bufpages;		/* Max number of pages for buffers' data */
244extern struct pool bufpool;
245extern struct bufhead bufhead;
246
247void	bawrite(struct buf *);
248void	bdwrite(struct buf *);
249void	biodone(struct buf *);
250int	biowait(struct buf *);
251int bread(struct vnode *, daddr_t, int, struct buf **);
252int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
253    struct buf **);
254void	brelse(struct buf *);
255#define bremfree bufcache_take
256void	bufinit(void);
257void	buf_dirty(struct buf *);
258void    buf_undirty(struct buf *);
259void	buf_adjcnt(struct buf *, long);
260int	bwrite(struct buf *);
261struct buf *getblk(struct vnode *, daddr_t, int, int, uint64_t);
262struct buf *geteblk(size_t);
263struct buf *incore(struct vnode *, daddr_t);
264
265/*
266 * bufcache functions
267 */
268void bufcache_take(struct buf *);
269void bufcache_release(struct buf *);
270
271int buf_flip_high(struct buf *);
272void buf_flip_dma(struct buf *);
273struct buf *bufcache_getcleanbuf(int, int);
274struct buf *bufcache_getdirtybuf(void);
275
276/*
277 * buf_kvm_init initializes the kvm handling for buffers.
278 * buf_acquire sets the B_BUSY flag and ensures that the buffer is
279 * mapped in the kvm.
280 * buf_release clears the B_BUSY flag and allows the buffer to become
281 * unmapped.
282 * buf_unmap is for internal use only. Unmaps the buffer from kvm.
283 */
284void	buf_mem_init(vsize_t);
285void	buf_acquire(struct buf *);
286void	buf_acquire_nomap(struct buf *);
287void	buf_map(struct buf *);
288void	buf_release(struct buf *);
289int	buf_dealloc_mem(struct buf *);
290void	buf_fix_mapping(struct buf *, vsize_t);
291void	buf_alloc_pages(struct buf *, vsize_t);
292void	buf_free_pages(struct buf *);
293
294void	minphys(struct buf *bp);
295int	physio(void (*strategy)(struct buf *), dev_t dev, int flags,
296	    void (*minphys)(struct buf *), struct uio *uio);
297void  brelvp(struct buf *);
298void  reassignbuf(struct buf *);
299void  bgetvp(struct vnode *, struct buf *);
300
301void  buf_replacevnode(struct buf *, struct vnode *);
302void  buf_daemon(void *);
303void  buf_replacevnode(struct buf *, struct vnode *);
304int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
305
306__END_DECLS
307#endif /* _KERNEL */
308#endif /* !_SYS_BUF_H_ */
309