1/*
2 * Copyright (c) 2020 iXsystems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/types.h>
29#include <sys/param.h>
30#include <sys/dmu.h>
31#include <sys/dmu_impl.h>
32#include <sys/dmu_tx.h>
33#include <sys/dbuf.h>
34#include <sys/dnode.h>
35#include <sys/zfs_context.h>
36#include <sys/dmu_objset.h>
37#include <sys/dmu_traverse.h>
38#include <sys/dsl_dataset.h>
39#include <sys/dsl_dir.h>
40#include <sys/dsl_pool.h>
41#include <sys/dsl_synctask.h>
42#include <sys/dsl_prop.h>
43#include <sys/dmu_zfetch.h>
44#include <sys/zfs_ioctl.h>
45#include <sys/zap.h>
46#include <sys/zio_checksum.h>
47#include <sys/zio_compress.h>
48#include <sys/sa.h>
49#include <sys/zfeature.h>
50#include <sys/abd.h>
51#include <sys/zfs_rlock.h>
52#include <sys/racct.h>
53#include <sys/vm.h>
54#include <sys/zfs_znode.h>
55#include <sys/zfs_vnops.h>
56
57#include <sys/ccompat.h>
58
59#ifndef IDX_TO_OFF
60#define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
61#endif
62
63#if  __FreeBSD_version < 1300051
64#define	VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
65#else
66#define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
67#endif
68
69
70#if __FreeBSD_version < 1300072
71#define	dmu_page_lock(m)	vm_page_lock(m)
72#define	dmu_page_unlock(m)	vm_page_unlock(m)
73#else
74#define	dmu_page_lock(m)
75#define	dmu_page_unlock(m)
76#endif
77
78int
79dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
80    vm_page_t *ma, dmu_tx_t *tx)
81{
82	dmu_buf_t **dbp;
83	struct sf_buf *sf;
84	int numbufs, i;
85	int err;
86
87	if (size == 0)
88		return (0);
89
90	err = dmu_buf_hold_array(os, object, offset, size,
91	    FALSE, FTAG, &numbufs, &dbp);
92	if (err)
93		return (err);
94
95	for (i = 0; i < numbufs; i++) {
96		int tocpy, copied, thiscpy;
97		int bufoff;
98		dmu_buf_t *db = dbp[i];
99		caddr_t va;
100
101		ASSERT3U(size, >, 0);
102		ASSERT3U(db->db_size, >=, PAGESIZE);
103
104		bufoff = offset - db->db_offset;
105		tocpy = (int)MIN(db->db_size - bufoff, size);
106
107		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
108
109		if (tocpy == db->db_size)
110			dmu_buf_will_fill(db, tx, B_FALSE);
111		else
112			dmu_buf_will_dirty(db, tx);
113
114		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
115			ASSERT3U(ptoa((*ma)->pindex), ==,
116			    db->db_offset + bufoff);
117			thiscpy = MIN(PAGESIZE, tocpy - copied);
118			va = zfs_map_page(*ma, &sf);
119			memcpy((char *)db->db_data + bufoff, va, thiscpy);
120			zfs_unmap_page(sf);
121			ma += 1;
122			bufoff += PAGESIZE;
123		}
124
125		if (tocpy == db->db_size)
126			dmu_buf_fill_done(db, tx, B_FALSE);
127
128		offset += tocpy;
129		size -= tocpy;
130	}
131	dmu_buf_rele_array(dbp, numbufs, FTAG);
132	return (err);
133}
134
135int
136dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
137    int *rbehind, int *rahead, int last_size)
138{
139	struct sf_buf *sf;
140	vm_object_t vmobj;
141	vm_page_t m;
142	dmu_buf_t **dbp;
143	dmu_buf_t *db;
144	caddr_t va;
145	int numbufs, i;
146	int bufoff, pgoff, tocpy;
147	int mi, di;
148	int err;
149
150	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
151	ASSERT3S(last_size, <=, PAGE_SIZE);
152
153	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
154	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
155	if (err != 0)
156		return (err);
157
158#ifdef ZFS_DEBUG
159	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
160	if (dbp[0]->db_offset != 0 || numbufs > 1) {
161		for (i = 0; i < numbufs; i++) {
162			ASSERT(ISP2(dbp[i]->db_size));
163			ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
164			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
165		}
166	}
167#endif
168
169	vmobj = ma[0]->object;
170	zfs_vmobject_wlock_12(vmobj);
171
172	db = dbp[0];
173	for (i = 0; i < *rbehind; i++) {
174		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
175		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
176		if (m == NULL)
177			break;
178		if (!vm_page_none_valid(m)) {
179			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
180			vm_page_do_sunbusy(m);
181			break;
182		}
183		ASSERT3U(m->dirty, ==, 0);
184		ASSERT(!pmap_page_is_write_mapped(m));
185
186		ASSERT3U(db->db_size, >, PAGE_SIZE);
187		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
188		va = zfs_map_page(m, &sf);
189		memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
190		zfs_unmap_page(sf);
191		vm_page_valid(m);
192		dmu_page_lock(m);
193		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
194			vm_page_activate(m);
195		else
196			vm_page_deactivate(m);
197		dmu_page_unlock(m);
198		vm_page_do_sunbusy(m);
199	}
200	*rbehind = i;
201
202	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
203	pgoff = 0;
204	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
205		if (pgoff == 0) {
206			m = ma[mi];
207			if (m != bogus_page) {
208				vm_page_assert_xbusied(m);
209				ASSERT(vm_page_none_valid(m));
210				ASSERT3U(m->dirty, ==, 0);
211				ASSERT(!pmap_page_is_write_mapped(m));
212				va = zfs_map_page(m, &sf);
213			}
214		}
215		if (bufoff == 0)
216			db = dbp[di];
217
218		if (m != bogus_page) {
219			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
220			    db->db_offset + bufoff);
221		}
222
223		/*
224		 * We do not need to clamp the copy size by the file
225		 * size as the last block is zero-filled beyond the
226		 * end of file anyway.
227		 */
228		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
229		ASSERT3S(tocpy, >=, 0);
230		if (m != bogus_page)
231			memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
232
233		pgoff += tocpy;
234		ASSERT3S(pgoff, >=, 0);
235		ASSERT3S(pgoff, <=, PAGESIZE);
236		if (pgoff == PAGESIZE) {
237			if (m != bogus_page) {
238				zfs_unmap_page(sf);
239				vm_page_valid(m);
240			}
241			ASSERT3S(mi, <, count);
242			mi++;
243			pgoff = 0;
244		}
245
246		bufoff += tocpy;
247		ASSERT3S(bufoff, >=, 0);
248		ASSERT3S(bufoff, <=, db->db_size);
249		if (bufoff == db->db_size) {
250			ASSERT3S(di, <, numbufs);
251			di++;
252			bufoff = 0;
253		}
254	}
255
256#ifdef ZFS_DEBUG
257	/*
258	 * Three possibilities:
259	 * - last requested page ends at a buffer boundary and , thus,
260	 *   all pages and buffers have been iterated;
261	 * - all requested pages are filled, but the last buffer
262	 *   has not been exhausted;
263	 *   the read-ahead is possible only in this case;
264	 * - all buffers have been read, but the last page has not been
265	 *   fully filled;
266	 *   this is only possible if the file has only a single buffer
267	 *   with a size that is not a multiple of the page size.
268	 */
269	if (mi == count) {
270		ASSERT3S(di, >=, numbufs - 1);
271		IMPLY(*rahead != 0, di == numbufs - 1);
272		IMPLY(*rahead != 0, bufoff != 0);
273		ASSERT0(pgoff);
274	}
275	if (di == numbufs) {
276		ASSERT3S(mi, >=, count - 1);
277		ASSERT0(*rahead);
278		IMPLY(pgoff == 0, mi == count);
279		if (pgoff != 0) {
280			ASSERT3S(mi, ==, count - 1);
281			ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
282		}
283	}
284#endif
285	if (pgoff != 0) {
286		ASSERT3P(m, !=, bogus_page);
287		memset(va + pgoff, 0, PAGESIZE - pgoff);
288		zfs_unmap_page(sf);
289		vm_page_valid(m);
290	}
291
292	for (i = 0; i < *rahead; i++) {
293		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
294		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
295		if (m == NULL)
296			break;
297		if (!vm_page_none_valid(m)) {
298			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
299			vm_page_do_sunbusy(m);
300			break;
301		}
302		ASSERT3U(m->dirty, ==, 0);
303		ASSERT(!pmap_page_is_write_mapped(m));
304
305		ASSERT3U(db->db_size, >, PAGE_SIZE);
306		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
307		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
308		va = zfs_map_page(m, &sf);
309		memcpy(va, (char *)db->db_data + bufoff, tocpy);
310		if (tocpy < PAGESIZE) {
311			ASSERT3S(i, ==, *rahead - 1);
312			ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
313			memset(va + tocpy, 0, PAGESIZE - tocpy);
314		}
315		zfs_unmap_page(sf);
316		vm_page_valid(m);
317		dmu_page_lock(m);
318		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
319			vm_page_activate(m);
320		else
321			vm_page_deactivate(m);
322		dmu_page_unlock(m);
323		vm_page_do_sunbusy(m);
324	}
325	*rahead = i;
326	zfs_vmobject_wunlock_12(vmobj);
327
328	dmu_buf_rele_array(dbp, numbufs, FTAG);
329	return (0);
330}
331