1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2017 Red Hat, Inc.
4 */
5
6#include <linux/cred.h>
7#include <linux/file.h>
8#include <linux/mount.h>
9#include <linux/xattr.h>
10#include <linux/uio.h>
11#include <linux/uaccess.h>
12#include <linux/security.h>
13#include <linux/fs.h>
14#include <linux/backing-file.h>
15#include "overlayfs.h"
16
17static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18{
19	if (realinode != ovl_inode_upper(inode))
20		return 'l';
21	if (ovl_has_upperdata(inode))
22		return 'u';
23	else
24		return 'm';
25}
26
27static struct file *ovl_open_realfile(const struct file *file,
28				      const struct path *realpath)
29{
30	struct inode *realinode = d_inode(realpath->dentry);
31	struct inode *inode = file_inode(file);
32	struct mnt_idmap *real_idmap;
33	struct file *realfile;
34	const struct cred *old_cred;
35	int flags = file->f_flags | OVL_OPEN_FLAGS;
36	int acc_mode = ACC_MODE(flags);
37	int err;
38
39	if (flags & O_APPEND)
40		acc_mode |= MAY_APPEND;
41
42	old_cred = ovl_override_creds(inode->i_sb);
43	real_idmap = mnt_idmap(realpath->mnt);
44	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45	if (err) {
46		realfile = ERR_PTR(err);
47	} else {
48		if (!inode_owner_or_capable(real_idmap, realinode))
49			flags &= ~O_NOATIME;
50
51		realfile = backing_file_open(&file->f_path, flags, realpath,
52					     current_cred());
53	}
54	revert_creds(old_cred);
55
56	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59
60	return realfile;
61}
62
63#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64
65static int ovl_change_flags(struct file *file, unsigned int flags)
66{
67	struct inode *inode = file_inode(file);
68	int err;
69
70	flags &= OVL_SETFL_MASK;
71
72	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73		return -EPERM;
74
75	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76		return -EINVAL;
77
78	if (file->f_op->check_flags) {
79		err = file->f_op->check_flags(flags);
80		if (err)
81			return err;
82	}
83
84	spin_lock(&file->f_lock);
85	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86	file->f_iocb_flags = iocb_flags(file);
87	spin_unlock(&file->f_lock);
88
89	return 0;
90}
91
92static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
93			       bool allow_meta)
94{
95	struct dentry *dentry = file_dentry(file);
96	struct path realpath;
97	int err;
98
99	real->flags = 0;
100	real->file = file->private_data;
101
102	if (allow_meta) {
103		ovl_path_real(dentry, &realpath);
104	} else {
105		/* lazy lookup and verify of lowerdata */
106		err = ovl_verify_lowerdata(dentry);
107		if (err)
108			return err;
109
110		ovl_path_realdata(dentry, &realpath);
111	}
112	if (!realpath.dentry)
113		return -EIO;
114
115	/* Has it been copied up since we'd opened it? */
116	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
117		real->flags = FDPUT_FPUT;
118		real->file = ovl_open_realfile(file, &realpath);
119
120		return PTR_ERR_OR_ZERO(real->file);
121	}
122
123	/* Did the flags change since open? */
124	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
125		return ovl_change_flags(real->file, file->f_flags);
126
127	return 0;
128}
129
130static int ovl_real_fdget(const struct file *file, struct fd *real)
131{
132	if (d_is_dir(file_dentry(file))) {
133		real->flags = 0;
134		real->file = ovl_dir_real_file(file, false);
135
136		return PTR_ERR_OR_ZERO(real->file);
137	}
138
139	return ovl_real_fdget_meta(file, real, false);
140}
141
142static int ovl_open(struct inode *inode, struct file *file)
143{
144	struct dentry *dentry = file_dentry(file);
145	struct file *realfile;
146	struct path realpath;
147	int err;
148
149	/* lazy lookup and verify lowerdata */
150	err = ovl_verify_lowerdata(dentry);
151	if (err)
152		return err;
153
154	err = ovl_maybe_copy_up(dentry, file->f_flags);
155	if (err)
156		return err;
157
158	/* No longer need these flags, so don't pass them on to underlying fs */
159	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
160
161	ovl_path_realdata(dentry, &realpath);
162	if (!realpath.dentry)
163		return -EIO;
164
165	realfile = ovl_open_realfile(file, &realpath);
166	if (IS_ERR(realfile))
167		return PTR_ERR(realfile);
168
169	file->private_data = realfile;
170
171	return 0;
172}
173
174static int ovl_release(struct inode *inode, struct file *file)
175{
176	fput(file->private_data);
177
178	return 0;
179}
180
181static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
182{
183	struct inode *inode = file_inode(file);
184	struct fd real;
185	const struct cred *old_cred;
186	loff_t ret;
187
188	/*
189	 * The two special cases below do not need to involve real fs,
190	 * so we can optimizing concurrent callers.
191	 */
192	if (offset == 0) {
193		if (whence == SEEK_CUR)
194			return file->f_pos;
195
196		if (whence == SEEK_SET)
197			return vfs_setpos(file, 0, 0);
198	}
199
200	ret = ovl_real_fdget(file, &real);
201	if (ret)
202		return ret;
203
204	/*
205	 * Overlay file f_pos is the master copy that is preserved
206	 * through copy up and modified on read/write, but only real
207	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
208	 * limitations that are more strict than ->s_maxbytes for specific
209	 * files, so we use the real file to perform seeks.
210	 */
211	ovl_inode_lock(inode);
212	real.file->f_pos = file->f_pos;
213
214	old_cred = ovl_override_creds(inode->i_sb);
215	ret = vfs_llseek(real.file, offset, whence);
216	revert_creds(old_cred);
217
218	file->f_pos = real.file->f_pos;
219	ovl_inode_unlock(inode);
220
221	fdput(real);
222
223	return ret;
224}
225
226static void ovl_file_modified(struct file *file)
227{
228	/* Update size/mtime */
229	ovl_copyattr(file_inode(file));
230}
231
232static void ovl_file_accessed(struct file *file)
233{
234	struct inode *inode, *upperinode;
235	struct timespec64 ctime, uctime;
236	struct timespec64 mtime, umtime;
237
238	if (file->f_flags & O_NOATIME)
239		return;
240
241	inode = file_inode(file);
242	upperinode = ovl_inode_upper(inode);
243
244	if (!upperinode)
245		return;
246
247	ctime = inode_get_ctime(inode);
248	uctime = inode_get_ctime(upperinode);
249	mtime = inode_get_mtime(inode);
250	umtime = inode_get_mtime(upperinode);
251	if ((!timespec64_equal(&mtime, &umtime)) ||
252	     !timespec64_equal(&ctime, &uctime)) {
253		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
254		inode_set_ctime_to_ts(inode, uctime);
255	}
256
257	touch_atime(&file->f_path);
258}
259
260static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
261{
262	struct file *file = iocb->ki_filp;
263	struct fd real;
264	ssize_t ret;
265	struct backing_file_ctx ctx = {
266		.cred = ovl_creds(file_inode(file)->i_sb),
267		.user_file = file,
268		.accessed = ovl_file_accessed,
269	};
270
271	if (!iov_iter_count(iter))
272		return 0;
273
274	ret = ovl_real_fdget(file, &real);
275	if (ret)
276		return ret;
277
278	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
279				     &ctx);
280	fdput(real);
281
282	return ret;
283}
284
285static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
286{
287	struct file *file = iocb->ki_filp;
288	struct inode *inode = file_inode(file);
289	struct fd real;
290	ssize_t ret;
291	int ifl = iocb->ki_flags;
292	struct backing_file_ctx ctx = {
293		.cred = ovl_creds(inode->i_sb),
294		.user_file = file,
295		.end_write = ovl_file_modified,
296	};
297
298	if (!iov_iter_count(iter))
299		return 0;
300
301	inode_lock(inode);
302	/* Update mode */
303	ovl_copyattr(inode);
304
305	ret = ovl_real_fdget(file, &real);
306	if (ret)
307		goto out_unlock;
308
309	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
310		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
311
312	/*
313	 * Overlayfs doesn't support deferred completions, don't copy
314	 * this property in case it is set by the issuer.
315	 */
316	ifl &= ~IOCB_DIO_CALLER_COMP;
317	ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
318	fdput(real);
319
320out_unlock:
321	inode_unlock(inode);
322
323	return ret;
324}
325
326static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
327			       struct pipe_inode_info *pipe, size_t len,
328			       unsigned int flags)
329{
330	struct fd real;
331	ssize_t ret;
332	struct backing_file_ctx ctx = {
333		.cred = ovl_creds(file_inode(in)->i_sb),
334		.user_file = in,
335		.accessed = ovl_file_accessed,
336	};
337
338	ret = ovl_real_fdget(in, &real);
339	if (ret)
340		return ret;
341
342	ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
343	fdput(real);
344
345	return ret;
346}
347
348/*
349 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
350 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
351 * and file_start_write(real.file) in ovl_write_iter().
352 *
353 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
354 * the real file.
355 */
356static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
357				loff_t *ppos, size_t len, unsigned int flags)
358{
359	struct fd real;
360	struct inode *inode = file_inode(out);
361	ssize_t ret;
362	struct backing_file_ctx ctx = {
363		.cred = ovl_creds(inode->i_sb),
364		.user_file = out,
365		.end_write = ovl_file_modified,
366	};
367
368	inode_lock(inode);
369	/* Update mode */
370	ovl_copyattr(inode);
371
372	ret = ovl_real_fdget(out, &real);
373	if (ret)
374		goto out_unlock;
375
376	ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
377	fdput(real);
378
379out_unlock:
380	inode_unlock(inode);
381
382	return ret;
383}
384
385static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
386{
387	struct fd real;
388	const struct cred *old_cred;
389	int ret;
390
391	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
392	if (ret <= 0)
393		return ret;
394
395	ret = ovl_real_fdget_meta(file, &real, !datasync);
396	if (ret)
397		return ret;
398
399	/* Don't sync lower file for fear of receiving EROFS error */
400	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
401		old_cred = ovl_override_creds(file_inode(file)->i_sb);
402		ret = vfs_fsync_range(real.file, start, end, datasync);
403		revert_creds(old_cred);
404	}
405
406	fdput(real);
407
408	return ret;
409}
410
411static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
412{
413	struct file *realfile = file->private_data;
414	struct backing_file_ctx ctx = {
415		.cred = ovl_creds(file_inode(file)->i_sb),
416		.user_file = file,
417		.accessed = ovl_file_accessed,
418	};
419
420	return backing_file_mmap(realfile, vma, &ctx);
421}
422
423static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
424{
425	struct inode *inode = file_inode(file);
426	struct fd real;
427	const struct cred *old_cred;
428	int ret;
429
430	inode_lock(inode);
431	/* Update mode */
432	ovl_copyattr(inode);
433	ret = file_remove_privs(file);
434	if (ret)
435		goto out_unlock;
436
437	ret = ovl_real_fdget(file, &real);
438	if (ret)
439		goto out_unlock;
440
441	old_cred = ovl_override_creds(file_inode(file)->i_sb);
442	ret = vfs_fallocate(real.file, mode, offset, len);
443	revert_creds(old_cred);
444
445	/* Update size */
446	ovl_file_modified(file);
447
448	fdput(real);
449
450out_unlock:
451	inode_unlock(inode);
452
453	return ret;
454}
455
456static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
457{
458	struct fd real;
459	const struct cred *old_cred;
460	int ret;
461
462	ret = ovl_real_fdget(file, &real);
463	if (ret)
464		return ret;
465
466	old_cred = ovl_override_creds(file_inode(file)->i_sb);
467	ret = vfs_fadvise(real.file, offset, len, advice);
468	revert_creds(old_cred);
469
470	fdput(real);
471
472	return ret;
473}
474
475enum ovl_copyop {
476	OVL_COPY,
477	OVL_CLONE,
478	OVL_DEDUPE,
479};
480
481static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
482			    struct file *file_out, loff_t pos_out,
483			    loff_t len, unsigned int flags, enum ovl_copyop op)
484{
485	struct inode *inode_out = file_inode(file_out);
486	struct fd real_in, real_out;
487	const struct cred *old_cred;
488	loff_t ret;
489
490	inode_lock(inode_out);
491	if (op != OVL_DEDUPE) {
492		/* Update mode */
493		ovl_copyattr(inode_out);
494		ret = file_remove_privs(file_out);
495		if (ret)
496			goto out_unlock;
497	}
498
499	ret = ovl_real_fdget(file_out, &real_out);
500	if (ret)
501		goto out_unlock;
502
503	ret = ovl_real_fdget(file_in, &real_in);
504	if (ret) {
505		fdput(real_out);
506		goto out_unlock;
507	}
508
509	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
510	switch (op) {
511	case OVL_COPY:
512		ret = vfs_copy_file_range(real_in.file, pos_in,
513					  real_out.file, pos_out, len, flags);
514		break;
515
516	case OVL_CLONE:
517		ret = vfs_clone_file_range(real_in.file, pos_in,
518					   real_out.file, pos_out, len, flags);
519		break;
520
521	case OVL_DEDUPE:
522		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
523						real_out.file, pos_out, len,
524						flags);
525		break;
526	}
527	revert_creds(old_cred);
528
529	/* Update size */
530	ovl_file_modified(file_out);
531
532	fdput(real_in);
533	fdput(real_out);
534
535out_unlock:
536	inode_unlock(inode_out);
537
538	return ret;
539}
540
541static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
542				   struct file *file_out, loff_t pos_out,
543				   size_t len, unsigned int flags)
544{
545	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
546			    OVL_COPY);
547}
548
549static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
550				   struct file *file_out, loff_t pos_out,
551				   loff_t len, unsigned int remap_flags)
552{
553	enum ovl_copyop op;
554
555	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
556		return -EINVAL;
557
558	if (remap_flags & REMAP_FILE_DEDUP)
559		op = OVL_DEDUPE;
560	else
561		op = OVL_CLONE;
562
563	/*
564	 * Don't copy up because of a dedupe request, this wouldn't make sense
565	 * most of the time (data would be duplicated instead of deduplicated).
566	 */
567	if (op == OVL_DEDUPE &&
568	    (!ovl_inode_upper(file_inode(file_in)) ||
569	     !ovl_inode_upper(file_inode(file_out))))
570		return -EPERM;
571
572	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
573			    remap_flags, op);
574}
575
576static int ovl_flush(struct file *file, fl_owner_t id)
577{
578	struct fd real;
579	const struct cred *old_cred;
580	int err;
581
582	err = ovl_real_fdget(file, &real);
583	if (err)
584		return err;
585
586	if (real.file->f_op->flush) {
587		old_cred = ovl_override_creds(file_inode(file)->i_sb);
588		err = real.file->f_op->flush(real.file, id);
589		revert_creds(old_cred);
590	}
591	fdput(real);
592
593	return err;
594}
595
596const struct file_operations ovl_file_operations = {
597	.open		= ovl_open,
598	.release	= ovl_release,
599	.llseek		= ovl_llseek,
600	.read_iter	= ovl_read_iter,
601	.write_iter	= ovl_write_iter,
602	.fsync		= ovl_fsync,
603	.mmap		= ovl_mmap,
604	.fallocate	= ovl_fallocate,
605	.fadvise	= ovl_fadvise,
606	.flush		= ovl_flush,
607	.splice_read    = ovl_splice_read,
608	.splice_write   = ovl_splice_write,
609
610	.copy_file_range	= ovl_copy_file_range,
611	.remap_file_range	= ovl_remap_file_range,
612};
613