1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines.  Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/fs.h>
16#include <linux/log2.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/errno.h>
20#include <linux/signal.h>
21#include <linux/binfmts.h>
22#include <linux/string.h>
23#include <linux/file.h>
24#include <linux/slab.h>
25#include <linux/personality.h>
26#include <linux/elfcore.h>
27#include <linux/init.h>
28#include <linux/highuid.h>
29#include <linux/compiler.h>
30#include <linux/highmem.h>
31#include <linux/hugetlb.h>
32#include <linux/pagemap.h>
33#include <linux/vmalloc.h>
34#include <linux/security.h>
35#include <linux/random.h>
36#include <linux/elf.h>
37#include <linux/elf-randomize.h>
38#include <linux/utsname.h>
39#include <linux/coredump.h>
40#include <linux/sched.h>
41#include <linux/sched/coredump.h>
42#include <linux/sched/task_stack.h>
43#include <linux/sched/cputime.h>
44#include <linux/sizes.h>
45#include <linux/types.h>
46#include <linux/cred.h>
47#include <linux/dax.h>
48#include <linux/uaccess.h>
49#include <linux/rseq.h>
50#include <asm/param.h>
51#include <asm/page.h>
52
53#ifndef ELF_COMPAT
54#define ELF_COMPAT 0
55#endif
56
57#ifndef user_long_t
58#define user_long_t long
59#endif
60#ifndef user_siginfo_t
61#define user_siginfo_t siginfo_t
62#endif
63
64/* That's for binfmt_elf_fdpic to deal with */
65#ifndef elf_check_fdpic
66#define elf_check_fdpic(ex) false
67#endif
68
69static int load_elf_binary(struct linux_binprm *bprm);
70
71#ifdef CONFIG_USELIB
72static int load_elf_library(struct file *);
73#else
74#define load_elf_library NULL
75#endif
76
77/*
78 * If we don't support core dumping, then supply a NULL so we
79 * don't even try.
80 */
81#ifdef CONFIG_ELF_CORE
82static int elf_core_dump(struct coredump_params *cprm);
83#else
84#define elf_core_dump	NULL
85#endif
86
87#if ELF_EXEC_PAGESIZE > PAGE_SIZE
88#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
89#else
90#define ELF_MIN_ALIGN	PAGE_SIZE
91#endif
92
93#ifndef ELF_CORE_EFLAGS
94#define ELF_CORE_EFLAGS	0
95#endif
96
97#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
98#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
99#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
100
101static struct linux_binfmt elf_format = {
102	.module		= THIS_MODULE,
103	.load_binary	= load_elf_binary,
104	.load_shlib	= load_elf_library,
105#ifdef CONFIG_COREDUMP
106	.core_dump	= elf_core_dump,
107	.min_coredump	= ELF_EXEC_PAGESIZE,
108#endif
109};
110
111#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
112
113/*
114 * We need to explicitly zero any trailing portion of the page that follows
115 * p_filesz when it ends before the page ends (e.g. bss), otherwise this
116 * memory will contain the junk from the file that should not be present.
117 */
118static int padzero(unsigned long address)
119{
120	unsigned long nbyte;
121
122	nbyte = ELF_PAGEOFFSET(address);
123	if (nbyte) {
124		nbyte = ELF_MIN_ALIGN - nbyte;
125		if (clear_user((void __user *)address, nbyte))
126			return -EFAULT;
127	}
128	return 0;
129}
130
131/* Let's use some macros to make this stack manipulation a little clearer */
132#ifdef CONFIG_STACK_GROWSUP
133#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134#define STACK_ROUND(sp, items) \
135	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136#define STACK_ALLOC(sp, len) ({ \
137	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
138	old_sp; })
139#else
140#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141#define STACK_ROUND(sp, items) \
142	(((unsigned long) (sp - items)) &~ 15UL)
143#define STACK_ALLOC(sp, len) (sp -= len)
144#endif
145
146#ifndef ELF_BASE_PLATFORM
147/*
148 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150 * will be copied to the user stack in the same manner as AT_PLATFORM.
151 */
152#define ELF_BASE_PLATFORM NULL
153#endif
154
155static int
156create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
157		unsigned long interp_load_addr,
158		unsigned long e_entry, unsigned long phdr_addr)
159{
160	struct mm_struct *mm = current->mm;
161	unsigned long p = bprm->p;
162	int argc = bprm->argc;
163	int envc = bprm->envc;
164	elf_addr_t __user *sp;
165	elf_addr_t __user *u_platform;
166	elf_addr_t __user *u_base_platform;
167	elf_addr_t __user *u_rand_bytes;
168	const char *k_platform = ELF_PLATFORM;
169	const char *k_base_platform = ELF_BASE_PLATFORM;
170	unsigned char k_rand_bytes[16];
171	int items;
172	elf_addr_t *elf_info;
173	elf_addr_t flags = 0;
174	int ei_index;
175	const struct cred *cred = current_cred();
176	struct vm_area_struct *vma;
177
178	/*
179	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
180	 * evictions by the processes running on the same package. One
181	 * thing we can do is to shuffle the initial stack for them.
182	 */
183
184	p = arch_align_stack(p);
185
186	/*
187	 * If this architecture has a platform capability string, copy it
188	 * to userspace.  In some cases (Sparc), this info is impossible
189	 * for userspace to get any other way, in others (i386) it is
190	 * merely difficult.
191	 */
192	u_platform = NULL;
193	if (k_platform) {
194		size_t len = strlen(k_platform) + 1;
195
196		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197		if (copy_to_user(u_platform, k_platform, len))
198			return -EFAULT;
199	}
200
201	/*
202	 * If this architecture has a "base" platform capability
203	 * string, copy it to userspace.
204	 */
205	u_base_platform = NULL;
206	if (k_base_platform) {
207		size_t len = strlen(k_base_platform) + 1;
208
209		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
210		if (copy_to_user(u_base_platform, k_base_platform, len))
211			return -EFAULT;
212	}
213
214	/*
215	 * Generate 16 random bytes for userspace PRNG seeding.
216	 */
217	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
218	u_rand_bytes = (elf_addr_t __user *)
219		       STACK_ALLOC(p, sizeof(k_rand_bytes));
220	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
221		return -EFAULT;
222
223	/* Create the ELF interpreter info */
224	elf_info = (elf_addr_t *)mm->saved_auxv;
225	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
226#define NEW_AUX_ENT(id, val) \
227	do { \
228		*elf_info++ = id; \
229		*elf_info++ = val; \
230	} while (0)
231
232#ifdef ARCH_DLINFO
233	/*
234	 * ARCH_DLINFO must come first so PPC can do its special alignment of
235	 * AUXV.
236	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
237	 * ARCH_DLINFO changes
238	 */
239	ARCH_DLINFO;
240#endif
241	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
242	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
243	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
244	NEW_AUX_ENT(AT_PHDR, phdr_addr);
245	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
246	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
247	NEW_AUX_ENT(AT_BASE, interp_load_addr);
248	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
249		flags |= AT_FLAGS_PRESERVE_ARGV0;
250	NEW_AUX_ENT(AT_FLAGS, flags);
251	NEW_AUX_ENT(AT_ENTRY, e_entry);
252	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
253	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
254	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
255	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
256	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
257	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
258#ifdef ELF_HWCAP2
259	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
260#endif
261	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
262	if (k_platform) {
263		NEW_AUX_ENT(AT_PLATFORM,
264			    (elf_addr_t)(unsigned long)u_platform);
265	}
266	if (k_base_platform) {
267		NEW_AUX_ENT(AT_BASE_PLATFORM,
268			    (elf_addr_t)(unsigned long)u_base_platform);
269	}
270	if (bprm->have_execfd) {
271		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
272	}
273#ifdef CONFIG_RSEQ
274	NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
275	NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
276#endif
277#undef NEW_AUX_ENT
278	/* AT_NULL is zero; clear the rest too */
279	memset(elf_info, 0, (char *)mm->saved_auxv +
280			sizeof(mm->saved_auxv) - (char *)elf_info);
281
282	/* And advance past the AT_NULL entry.  */
283	elf_info += 2;
284
285	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
286	sp = STACK_ADD(p, ei_index);
287
288	items = (argc + 1) + (envc + 1) + 1;
289	bprm->p = STACK_ROUND(sp, items);
290
291	/* Point sp at the lowest address on the stack */
292#ifdef CONFIG_STACK_GROWSUP
293	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
294	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
295#else
296	sp = (elf_addr_t __user *)bprm->p;
297#endif
298
299
300	/*
301	 * Grow the stack manually; some architectures have a limit on how
302	 * far ahead a user-space access may be in order to grow the stack.
303	 */
304	if (mmap_write_lock_killable(mm))
305		return -EINTR;
306	vma = find_extend_vma_locked(mm, bprm->p);
307	mmap_write_unlock(mm);
308	if (!vma)
309		return -EFAULT;
310
311	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
312	if (put_user(argc, sp++))
313		return -EFAULT;
314
315	/* Populate list of argv pointers back to argv strings. */
316	p = mm->arg_end = mm->arg_start;
317	while (argc-- > 0) {
318		size_t len;
319		if (put_user((elf_addr_t)p, sp++))
320			return -EFAULT;
321		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
322		if (!len || len > MAX_ARG_STRLEN)
323			return -EINVAL;
324		p += len;
325	}
326	if (put_user(0, sp++))
327		return -EFAULT;
328	mm->arg_end = p;
329
330	/* Populate list of envp pointers back to envp strings. */
331	mm->env_end = mm->env_start = p;
332	while (envc-- > 0) {
333		size_t len;
334		if (put_user((elf_addr_t)p, sp++))
335			return -EFAULT;
336		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
337		if (!len || len > MAX_ARG_STRLEN)
338			return -EINVAL;
339		p += len;
340	}
341	if (put_user(0, sp++))
342		return -EFAULT;
343	mm->env_end = p;
344
345	/* Put the elf_info on the stack in the right place.  */
346	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
347		return -EFAULT;
348	return 0;
349}
350
351/*
352 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
353 * into memory at "addr". (Note that p_filesz is rounded up to the
354 * next page, so any extra bytes from the file must be wiped.)
355 */
356static unsigned long elf_map(struct file *filep, unsigned long addr,
357		const struct elf_phdr *eppnt, int prot, int type,
358		unsigned long total_size)
359{
360	unsigned long map_addr;
361	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
362	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
363	addr = ELF_PAGESTART(addr);
364	size = ELF_PAGEALIGN(size);
365
366	/* mmap() will return -EINVAL if given a zero size, but a
367	 * segment with zero filesize is perfectly valid */
368	if (!size)
369		return addr;
370
371	/*
372	* total_size is the size of the ELF (interpreter) image.
373	* The _first_ mmap needs to know the full size, otherwise
374	* randomization might put this image into an overlapping
375	* position with the ELF binary image. (since size < total_size)
376	* So we first map the 'big' image - and unmap the remainder at
377	* the end. (which unmap is needed for ELF images with holes.)
378	*/
379	if (total_size) {
380		total_size = ELF_PAGEALIGN(total_size);
381		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
382		if (!BAD_ADDR(map_addr))
383			vm_munmap(map_addr+size, total_size-size);
384	} else
385		map_addr = vm_mmap(filep, addr, size, prot, type, off);
386
387	if ((type & MAP_FIXED_NOREPLACE) &&
388	    PTR_ERR((void *)map_addr) == -EEXIST)
389		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
390			task_pid_nr(current), current->comm, (void *)addr);
391
392	return(map_addr);
393}
394
395/*
396 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
397 * into memory at "addr". Memory from "p_filesz" through "p_memsz"
398 * rounded up to the next page is zeroed.
399 */
400static unsigned long elf_load(struct file *filep, unsigned long addr,
401		const struct elf_phdr *eppnt, int prot, int type,
402		unsigned long total_size)
403{
404	unsigned long zero_start, zero_end;
405	unsigned long map_addr;
406
407	if (eppnt->p_filesz) {
408		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
409		if (BAD_ADDR(map_addr))
410			return map_addr;
411		if (eppnt->p_memsz > eppnt->p_filesz) {
412			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
413				eppnt->p_filesz;
414			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
415				eppnt->p_memsz;
416
417			/*
418			 * Zero the end of the last mapped page but ignore
419			 * any errors if the segment isn't writable.
420			 */
421			if (padzero(zero_start) && (prot & PROT_WRITE))
422				return -EFAULT;
423		}
424	} else {
425		map_addr = zero_start = ELF_PAGESTART(addr);
426		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
427			eppnt->p_memsz;
428	}
429	if (eppnt->p_memsz > eppnt->p_filesz) {
430		/*
431		 * Map the last of the segment.
432		 * If the header is requesting these pages to be
433		 * executable, honour that (ppc32 needs this).
434		 */
435		int error;
436
437		zero_start = ELF_PAGEALIGN(zero_start);
438		zero_end = ELF_PAGEALIGN(zero_end);
439
440		error = vm_brk_flags(zero_start, zero_end - zero_start,
441				     prot & PROT_EXEC ? VM_EXEC : 0);
442		if (error)
443			map_addr = error;
444	}
445	return map_addr;
446}
447
448
449static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
450{
451	elf_addr_t min_addr = -1;
452	elf_addr_t max_addr = 0;
453	bool pt_load = false;
454	int i;
455
456	for (i = 0; i < nr; i++) {
457		if (phdr[i].p_type == PT_LOAD) {
458			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
459			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
460			pt_load = true;
461		}
462	}
463	return pt_load ? (max_addr - min_addr) : 0;
464}
465
466static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
467{
468	ssize_t rv;
469
470	rv = kernel_read(file, buf, len, &pos);
471	if (unlikely(rv != len)) {
472		return (rv < 0) ? rv : -EIO;
473	}
474	return 0;
475}
476
477static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
478{
479	unsigned long alignment = 0;
480	int i;
481
482	for (i = 0; i < nr; i++) {
483		if (cmds[i].p_type == PT_LOAD) {
484			unsigned long p_align = cmds[i].p_align;
485
486			/* skip non-power of two alignments as invalid */
487			if (!is_power_of_2(p_align))
488				continue;
489			alignment = max(alignment, p_align);
490		}
491	}
492
493	/* ensure we align to at least one page */
494	return ELF_PAGEALIGN(alignment);
495}
496
497/**
498 * load_elf_phdrs() - load ELF program headers
499 * @elf_ex:   ELF header of the binary whose program headers should be loaded
500 * @elf_file: the opened ELF binary file
501 *
502 * Loads ELF program headers from the binary file elf_file, which has the ELF
503 * header pointed to by elf_ex, into a newly allocated array. The caller is
504 * responsible for freeing the allocated data. Returns NULL upon failure.
505 */
506static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
507				       struct file *elf_file)
508{
509	struct elf_phdr *elf_phdata = NULL;
510	int retval = -1;
511	unsigned int size;
512
513	/*
514	 * If the size of this structure has changed, then punt, since
515	 * we will be doing the wrong thing.
516	 */
517	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
518		goto out;
519
520	/* Sanity check the number of program headers... */
521	/* ...and their total size. */
522	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
523	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
524		goto out;
525
526	elf_phdata = kmalloc(size, GFP_KERNEL);
527	if (!elf_phdata)
528		goto out;
529
530	/* Read in the program headers */
531	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
532
533out:
534	if (retval) {
535		kfree(elf_phdata);
536		elf_phdata = NULL;
537	}
538	return elf_phdata;
539}
540
541#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
542
543/**
544 * struct arch_elf_state - arch-specific ELF loading state
545 *
546 * This structure is used to preserve architecture specific data during
547 * the loading of an ELF file, throughout the checking of architecture
548 * specific ELF headers & through to the point where the ELF load is
549 * known to be proceeding (ie. SET_PERSONALITY).
550 *
551 * This implementation is a dummy for architectures which require no
552 * specific state.
553 */
554struct arch_elf_state {
555};
556
557#define INIT_ARCH_ELF_STATE {}
558
559/**
560 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
561 * @ehdr:	The main ELF header
562 * @phdr:	The program header to check
563 * @elf:	The open ELF file
564 * @is_interp:	True if the phdr is from the interpreter of the ELF being
565 *		loaded, else false.
566 * @state:	Architecture-specific state preserved throughout the process
567 *		of loading the ELF.
568 *
569 * Inspects the program header phdr to validate its correctness and/or
570 * suitability for the system. Called once per ELF program header in the
571 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
572 * interpreter.
573 *
574 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
575 *         with that return code.
576 */
577static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
578				   struct elf_phdr *phdr,
579				   struct file *elf, bool is_interp,
580				   struct arch_elf_state *state)
581{
582	/* Dummy implementation, always proceed */
583	return 0;
584}
585
586/**
587 * arch_check_elf() - check an ELF executable
588 * @ehdr:	The main ELF header
589 * @has_interp:	True if the ELF has an interpreter, else false.
590 * @interp_ehdr: The interpreter's ELF header
591 * @state:	Architecture-specific state preserved throughout the process
592 *		of loading the ELF.
593 *
594 * Provides a final opportunity for architecture code to reject the loading
595 * of the ELF & cause an exec syscall to return an error. This is called after
596 * all program headers to be checked by arch_elf_pt_proc have been.
597 *
598 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
599 *         with that return code.
600 */
601static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
602				 struct elfhdr *interp_ehdr,
603				 struct arch_elf_state *state)
604{
605	/* Dummy implementation, always proceed */
606	return 0;
607}
608
609#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
610
611static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
612			    bool has_interp, bool is_interp)
613{
614	int prot = 0;
615
616	if (p_flags & PF_R)
617		prot |= PROT_READ;
618	if (p_flags & PF_W)
619		prot |= PROT_WRITE;
620	if (p_flags & PF_X)
621		prot |= PROT_EXEC;
622
623	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
624}
625
626/* This is much more generalized than the library routine read function,
627   so we keep this separate.  Technically the library read function
628   is only provided so that we can read a.out libraries that have
629   an ELF header */
630
631static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
632		struct file *interpreter,
633		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
634		struct arch_elf_state *arch_state)
635{
636	struct elf_phdr *eppnt;
637	unsigned long load_addr = 0;
638	int load_addr_set = 0;
639	unsigned long error = ~0UL;
640	unsigned long total_size;
641	int i;
642
643	/* First of all, some simple consistency checks */
644	if (interp_elf_ex->e_type != ET_EXEC &&
645	    interp_elf_ex->e_type != ET_DYN)
646		goto out;
647	if (!elf_check_arch(interp_elf_ex) ||
648	    elf_check_fdpic(interp_elf_ex))
649		goto out;
650	if (!interpreter->f_op->mmap)
651		goto out;
652
653	total_size = total_mapping_size(interp_elf_phdata,
654					interp_elf_ex->e_phnum);
655	if (!total_size) {
656		error = -EINVAL;
657		goto out;
658	}
659
660	eppnt = interp_elf_phdata;
661	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
662		if (eppnt->p_type == PT_LOAD) {
663			int elf_type = MAP_PRIVATE;
664			int elf_prot = make_prot(eppnt->p_flags, arch_state,
665						 true, true);
666			unsigned long vaddr = 0;
667			unsigned long k, map_addr;
668
669			vaddr = eppnt->p_vaddr;
670			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
671				elf_type |= MAP_FIXED;
672			else if (no_base && interp_elf_ex->e_type == ET_DYN)
673				load_addr = -vaddr;
674
675			map_addr = elf_load(interpreter, load_addr + vaddr,
676					eppnt, elf_prot, elf_type, total_size);
677			total_size = 0;
678			error = map_addr;
679			if (BAD_ADDR(map_addr))
680				goto out;
681
682			if (!load_addr_set &&
683			    interp_elf_ex->e_type == ET_DYN) {
684				load_addr = map_addr - ELF_PAGESTART(vaddr);
685				load_addr_set = 1;
686			}
687
688			/*
689			 * Check to see if the section's size will overflow the
690			 * allowed task size. Note that p_filesz must always be
691			 * <= p_memsize so it's only necessary to check p_memsz.
692			 */
693			k = load_addr + eppnt->p_vaddr;
694			if (BAD_ADDR(k) ||
695			    eppnt->p_filesz > eppnt->p_memsz ||
696			    eppnt->p_memsz > TASK_SIZE ||
697			    TASK_SIZE - eppnt->p_memsz < k) {
698				error = -ENOMEM;
699				goto out;
700			}
701		}
702	}
703
704	error = load_addr;
705out:
706	return error;
707}
708
709/*
710 * These are the functions used to load ELF style executables and shared
711 * libraries.  There is no binary dependent code anywhere else.
712 */
713
714static int parse_elf_property(const char *data, size_t *off, size_t datasz,
715			      struct arch_elf_state *arch,
716			      bool have_prev_type, u32 *prev_type)
717{
718	size_t o, step;
719	const struct gnu_property *pr;
720	int ret;
721
722	if (*off == datasz)
723		return -ENOENT;
724
725	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
726		return -EIO;
727	o = *off;
728	datasz -= *off;
729
730	if (datasz < sizeof(*pr))
731		return -ENOEXEC;
732	pr = (const struct gnu_property *)(data + o);
733	o += sizeof(*pr);
734	datasz -= sizeof(*pr);
735
736	if (pr->pr_datasz > datasz)
737		return -ENOEXEC;
738
739	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
740	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
741	if (step > datasz)
742		return -ENOEXEC;
743
744	/* Properties are supposed to be unique and sorted on pr_type: */
745	if (have_prev_type && pr->pr_type <= *prev_type)
746		return -ENOEXEC;
747	*prev_type = pr->pr_type;
748
749	ret = arch_parse_elf_property(pr->pr_type, data + o,
750				      pr->pr_datasz, ELF_COMPAT, arch);
751	if (ret)
752		return ret;
753
754	*off = o + step;
755	return 0;
756}
757
758#define NOTE_DATA_SZ SZ_1K
759#define GNU_PROPERTY_TYPE_0_NAME "GNU"
760#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
761
762static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
763				struct arch_elf_state *arch)
764{
765	union {
766		struct elf_note nhdr;
767		char data[NOTE_DATA_SZ];
768	} note;
769	loff_t pos;
770	ssize_t n;
771	size_t off, datasz;
772	int ret;
773	bool have_prev_type;
774	u32 prev_type;
775
776	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
777		return 0;
778
779	/* load_elf_binary() shouldn't call us unless this is true... */
780	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
781		return -ENOEXEC;
782
783	/* If the properties are crazy large, that's too bad (for now): */
784	if (phdr->p_filesz > sizeof(note))
785		return -ENOEXEC;
786
787	pos = phdr->p_offset;
788	n = kernel_read(f, &note, phdr->p_filesz, &pos);
789
790	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
791	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
792		return -EIO;
793
794	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
795	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
796	    strncmp(note.data + sizeof(note.nhdr),
797		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
798		return -ENOEXEC;
799
800	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
801		       ELF_GNU_PROPERTY_ALIGN);
802	if (off > n)
803		return -ENOEXEC;
804
805	if (note.nhdr.n_descsz > n - off)
806		return -ENOEXEC;
807	datasz = off + note.nhdr.n_descsz;
808
809	have_prev_type = false;
810	do {
811		ret = parse_elf_property(note.data, &off, datasz, arch,
812					 have_prev_type, &prev_type);
813		have_prev_type = true;
814	} while (!ret);
815
816	return ret == -ENOENT ? 0 : ret;
817}
818
819static int load_elf_binary(struct linux_binprm *bprm)
820{
821	struct file *interpreter = NULL; /* to shut gcc up */
822	unsigned long load_bias = 0, phdr_addr = 0;
823	int first_pt_load = 1;
824	unsigned long error;
825	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
826	struct elf_phdr *elf_property_phdata = NULL;
827	unsigned long elf_brk;
828	int retval, i;
829	unsigned long elf_entry;
830	unsigned long e_entry;
831	unsigned long interp_load_addr = 0;
832	unsigned long start_code, end_code, start_data, end_data;
833	unsigned long reloc_func_desc __maybe_unused = 0;
834	int executable_stack = EXSTACK_DEFAULT;
835	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
836	struct elfhdr *interp_elf_ex = NULL;
837	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
838	struct mm_struct *mm;
839	struct pt_regs *regs;
840
841	retval = -ENOEXEC;
842	/* First of all, some simple consistency checks */
843	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
844		goto out;
845
846	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
847		goto out;
848	if (!elf_check_arch(elf_ex))
849		goto out;
850	if (elf_check_fdpic(elf_ex))
851		goto out;
852	if (!bprm->file->f_op->mmap)
853		goto out;
854
855	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
856	if (!elf_phdata)
857		goto out;
858
859	elf_ppnt = elf_phdata;
860	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
861		char *elf_interpreter;
862
863		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
864			elf_property_phdata = elf_ppnt;
865			continue;
866		}
867
868		if (elf_ppnt->p_type != PT_INTERP)
869			continue;
870
871		/*
872		 * This is the program interpreter used for shared libraries -
873		 * for now assume that this is an a.out format binary.
874		 */
875		retval = -ENOEXEC;
876		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
877			goto out_free_ph;
878
879		retval = -ENOMEM;
880		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
881		if (!elf_interpreter)
882			goto out_free_ph;
883
884		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
885				  elf_ppnt->p_offset);
886		if (retval < 0)
887			goto out_free_interp;
888		/* make sure path is NULL terminated */
889		retval = -ENOEXEC;
890		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
891			goto out_free_interp;
892
893		interpreter = open_exec(elf_interpreter);
894		kfree(elf_interpreter);
895		retval = PTR_ERR(interpreter);
896		if (IS_ERR(interpreter))
897			goto out_free_ph;
898
899		/*
900		 * If the binary is not readable then enforce mm->dumpable = 0
901		 * regardless of the interpreter's permissions.
902		 */
903		would_dump(bprm, interpreter);
904
905		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
906		if (!interp_elf_ex) {
907			retval = -ENOMEM;
908			goto out_free_file;
909		}
910
911		/* Get the exec headers */
912		retval = elf_read(interpreter, interp_elf_ex,
913				  sizeof(*interp_elf_ex), 0);
914		if (retval < 0)
915			goto out_free_dentry;
916
917		break;
918
919out_free_interp:
920		kfree(elf_interpreter);
921		goto out_free_ph;
922	}
923
924	elf_ppnt = elf_phdata;
925	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
926		switch (elf_ppnt->p_type) {
927		case PT_GNU_STACK:
928			if (elf_ppnt->p_flags & PF_X)
929				executable_stack = EXSTACK_ENABLE_X;
930			else
931				executable_stack = EXSTACK_DISABLE_X;
932			break;
933
934		case PT_LOPROC ... PT_HIPROC:
935			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
936						  bprm->file, false,
937						  &arch_state);
938			if (retval)
939				goto out_free_dentry;
940			break;
941		}
942
943	/* Some simple consistency checks for the interpreter */
944	if (interpreter) {
945		retval = -ELIBBAD;
946		/* Not an ELF interpreter */
947		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
948			goto out_free_dentry;
949		/* Verify the interpreter has a valid arch */
950		if (!elf_check_arch(interp_elf_ex) ||
951		    elf_check_fdpic(interp_elf_ex))
952			goto out_free_dentry;
953
954		/* Load the interpreter program headers */
955		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
956						   interpreter);
957		if (!interp_elf_phdata)
958			goto out_free_dentry;
959
960		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
961		elf_property_phdata = NULL;
962		elf_ppnt = interp_elf_phdata;
963		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
964			switch (elf_ppnt->p_type) {
965			case PT_GNU_PROPERTY:
966				elf_property_phdata = elf_ppnt;
967				break;
968
969			case PT_LOPROC ... PT_HIPROC:
970				retval = arch_elf_pt_proc(interp_elf_ex,
971							  elf_ppnt, interpreter,
972							  true, &arch_state);
973				if (retval)
974					goto out_free_dentry;
975				break;
976			}
977	}
978
979	retval = parse_elf_properties(interpreter ?: bprm->file,
980				      elf_property_phdata, &arch_state);
981	if (retval)
982		goto out_free_dentry;
983
984	/*
985	 * Allow arch code to reject the ELF at this point, whilst it's
986	 * still possible to return an error to the code that invoked
987	 * the exec syscall.
988	 */
989	retval = arch_check_elf(elf_ex,
990				!!interpreter, interp_elf_ex,
991				&arch_state);
992	if (retval)
993		goto out_free_dentry;
994
995	/* Flush all traces of the currently running executable */
996	retval = begin_new_exec(bprm);
997	if (retval)
998		goto out_free_dentry;
999
1000	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1001	   may depend on the personality.  */
1002	SET_PERSONALITY2(*elf_ex, &arch_state);
1003	if (elf_read_implies_exec(*elf_ex, executable_stack))
1004		current->personality |= READ_IMPLIES_EXEC;
1005
1006	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1007		current->flags |= PF_RANDOMIZE;
1008
1009	setup_new_exec(bprm);
1010
1011	/* Do this so that we can load the interpreter, if need be.  We will
1012	   change some of these later */
1013	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1014				 executable_stack);
1015	if (retval < 0)
1016		goto out_free_dentry;
1017
1018	elf_brk = 0;
1019
1020	start_code = ~0UL;
1021	end_code = 0;
1022	start_data = 0;
1023	end_data = 0;
1024
1025	/* Now we do a little grungy work by mmapping the ELF image into
1026	   the correct location in memory. */
1027	for(i = 0, elf_ppnt = elf_phdata;
1028	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1029		int elf_prot, elf_flags;
1030		unsigned long k, vaddr;
1031		unsigned long total_size = 0;
1032		unsigned long alignment;
1033
1034		if (elf_ppnt->p_type != PT_LOAD)
1035			continue;
1036
1037		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1038				     !!interpreter, false);
1039
1040		elf_flags = MAP_PRIVATE;
1041
1042		vaddr = elf_ppnt->p_vaddr;
1043		/*
1044		 * The first time through the loop, first_pt_load is true:
1045		 * layout will be calculated. Once set, use MAP_FIXED since
1046		 * we know we've already safely mapped the entire region with
1047		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1048		 */
1049		if (!first_pt_load) {
1050			elf_flags |= MAP_FIXED;
1051		} else if (elf_ex->e_type == ET_EXEC) {
1052			/*
1053			 * This logic is run once for the first LOAD Program
1054			 * Header for ET_EXEC binaries. No special handling
1055			 * is needed.
1056			 */
1057			elf_flags |= MAP_FIXED_NOREPLACE;
1058		} else if (elf_ex->e_type == ET_DYN) {
1059			/*
1060			 * This logic is run once for the first LOAD Program
1061			 * Header for ET_DYN binaries to calculate the
1062			 * randomization (load_bias) for all the LOAD
1063			 * Program Headers.
1064			 *
1065			 * There are effectively two types of ET_DYN
1066			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1067			 * and loaders (ET_DYN without INTERP, since they
1068			 * _are_ the ELF interpreter). The loaders must
1069			 * be loaded away from programs since the program
1070			 * may otherwise collide with the loader (especially
1071			 * for ET_EXEC which does not have a randomized
1072			 * position). For example to handle invocations of
1073			 * "./ld.so someprog" to test out a new version of
1074			 * the loader, the subsequent program that the
1075			 * loader loads must avoid the loader itself, so
1076			 * they cannot share the same load range. Sufficient
1077			 * room for the brk must be allocated with the
1078			 * loader as well, since brk must be available with
1079			 * the loader.
1080			 *
1081			 * Therefore, programs are loaded offset from
1082			 * ELF_ET_DYN_BASE and loaders are loaded into the
1083			 * independently randomized mmap region (0 load_bias
1084			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1085			 */
1086			if (interpreter) {
1087				load_bias = ELF_ET_DYN_BASE;
1088				if (current->flags & PF_RANDOMIZE)
1089					load_bias += arch_mmap_rnd();
1090				alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1091				if (alignment)
1092					load_bias &= ~(alignment - 1);
1093				elf_flags |= MAP_FIXED_NOREPLACE;
1094			} else
1095				load_bias = 0;
1096
1097			/*
1098			 * Since load_bias is used for all subsequent loading
1099			 * calculations, we must lower it by the first vaddr
1100			 * so that the remaining calculations based on the
1101			 * ELF vaddrs will be correctly offset. The result
1102			 * is then page aligned.
1103			 */
1104			load_bias = ELF_PAGESTART(load_bias - vaddr);
1105
1106			/*
1107			 * Calculate the entire size of the ELF mapping
1108			 * (total_size), used for the initial mapping,
1109			 * due to load_addr_set which is set to true later
1110			 * once the initial mapping is performed.
1111			 *
1112			 * Note that this is only sensible when the LOAD
1113			 * segments are contiguous (or overlapping). If
1114			 * used for LOADs that are far apart, this would
1115			 * cause the holes between LOADs to be mapped,
1116			 * running the risk of having the mapping fail,
1117			 * as it would be larger than the ELF file itself.
1118			 *
1119			 * As a result, only ET_DYN does this, since
1120			 * some ET_EXEC (e.g. ia64) may have large virtual
1121			 * memory holes between LOADs.
1122			 *
1123			 */
1124			total_size = total_mapping_size(elf_phdata,
1125							elf_ex->e_phnum);
1126			if (!total_size) {
1127				retval = -EINVAL;
1128				goto out_free_dentry;
1129			}
1130		}
1131
1132		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1133				elf_prot, elf_flags, total_size);
1134		if (BAD_ADDR(error)) {
1135			retval = IS_ERR_VALUE(error) ?
1136				PTR_ERR((void*)error) : -EINVAL;
1137			goto out_free_dentry;
1138		}
1139
1140		if (first_pt_load) {
1141			first_pt_load = 0;
1142			if (elf_ex->e_type == ET_DYN) {
1143				load_bias += error -
1144				             ELF_PAGESTART(load_bias + vaddr);
1145				reloc_func_desc = load_bias;
1146			}
1147		}
1148
1149		/*
1150		 * Figure out which segment in the file contains the Program
1151		 * Header table, and map to the associated memory address.
1152		 */
1153		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1154		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1155			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1156				    elf_ppnt->p_vaddr;
1157		}
1158
1159		k = elf_ppnt->p_vaddr;
1160		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1161			start_code = k;
1162		if (start_data < k)
1163			start_data = k;
1164
1165		/*
1166		 * Check to see if the section's size will overflow the
1167		 * allowed task size. Note that p_filesz must always be
1168		 * <= p_memsz so it is only necessary to check p_memsz.
1169		 */
1170		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1171		    elf_ppnt->p_memsz > TASK_SIZE ||
1172		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1173			/* set_brk can never work. Avoid overflows. */
1174			retval = -EINVAL;
1175			goto out_free_dentry;
1176		}
1177
1178		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1179
1180		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1181			end_code = k;
1182		if (end_data < k)
1183			end_data = k;
1184		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1185		if (k > elf_brk)
1186			elf_brk = k;
1187	}
1188
1189	e_entry = elf_ex->e_entry + load_bias;
1190	phdr_addr += load_bias;
1191	elf_brk += load_bias;
1192	start_code += load_bias;
1193	end_code += load_bias;
1194	start_data += load_bias;
1195	end_data += load_bias;
1196
1197	current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
1198
1199	if (interpreter) {
1200		elf_entry = load_elf_interp(interp_elf_ex,
1201					    interpreter,
1202					    load_bias, interp_elf_phdata,
1203					    &arch_state);
1204		if (!IS_ERR_VALUE(elf_entry)) {
1205			/*
1206			 * load_elf_interp() returns relocation
1207			 * adjustment
1208			 */
1209			interp_load_addr = elf_entry;
1210			elf_entry += interp_elf_ex->e_entry;
1211		}
1212		if (BAD_ADDR(elf_entry)) {
1213			retval = IS_ERR_VALUE(elf_entry) ?
1214					(int)elf_entry : -EINVAL;
1215			goto out_free_dentry;
1216		}
1217		reloc_func_desc = interp_load_addr;
1218
1219		allow_write_access(interpreter);
1220		fput(interpreter);
1221
1222		kfree(interp_elf_ex);
1223		kfree(interp_elf_phdata);
1224	} else {
1225		elf_entry = e_entry;
1226		if (BAD_ADDR(elf_entry)) {
1227			retval = -EINVAL;
1228			goto out_free_dentry;
1229		}
1230	}
1231
1232	kfree(elf_phdata);
1233
1234	set_binfmt(&elf_format);
1235
1236#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1237	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1238	if (retval < 0)
1239		goto out;
1240#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1241
1242	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1243				   e_entry, phdr_addr);
1244	if (retval < 0)
1245		goto out;
1246
1247	mm = current->mm;
1248	mm->end_code = end_code;
1249	mm->start_code = start_code;
1250	mm->start_data = start_data;
1251	mm->end_data = end_data;
1252	mm->start_stack = bprm->p;
1253
1254	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1255		/*
1256		 * For architectures with ELF randomization, when executing
1257		 * a loader directly (i.e. no interpreter listed in ELF
1258		 * headers), move the brk area out of the mmap region
1259		 * (since it grows up, and may collide early with the stack
1260		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1261		 */
1262		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1263		    elf_ex->e_type == ET_DYN && !interpreter) {
1264			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1265		} else {
1266			/* Otherwise leave a gap between .bss and brk. */
1267			mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1268		}
1269
1270		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1271#ifdef compat_brk_randomized
1272		current->brk_randomized = 1;
1273#endif
1274	}
1275
1276	if (current->personality & MMAP_PAGE_ZERO) {
1277		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1278		   and some applications "depend" upon this behavior.
1279		   Since we do not have the power to recompile these, we
1280		   emulate the SVr4 behavior. Sigh. */
1281		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1282				MAP_FIXED | MAP_PRIVATE, 0);
1283	}
1284
1285	regs = current_pt_regs();
1286#ifdef ELF_PLAT_INIT
1287	/*
1288	 * The ABI may specify that certain registers be set up in special
1289	 * ways (on i386 %edx is the address of a DT_FINI function, for
1290	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1291	 * that the e_entry field is the address of the function descriptor
1292	 * for the startup routine, rather than the address of the startup
1293	 * routine itself.  This macro performs whatever initialization to
1294	 * the regs structure is required as well as any relocations to the
1295	 * function descriptor entries when executing dynamically links apps.
1296	 */
1297	ELF_PLAT_INIT(regs, reloc_func_desc);
1298#endif
1299
1300	finalize_exec(bprm);
1301	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1302	retval = 0;
1303out:
1304	return retval;
1305
1306	/* error cleanup */
1307out_free_dentry:
1308	kfree(interp_elf_ex);
1309	kfree(interp_elf_phdata);
1310out_free_file:
1311	allow_write_access(interpreter);
1312	if (interpreter)
1313		fput(interpreter);
1314out_free_ph:
1315	kfree(elf_phdata);
1316	goto out;
1317}
1318
1319#ifdef CONFIG_USELIB
1320/* This is really simpleminded and specialized - we are loading an
1321   a.out library that is given an ELF header. */
1322static int load_elf_library(struct file *file)
1323{
1324	struct elf_phdr *elf_phdata;
1325	struct elf_phdr *eppnt;
1326	int retval, error, i, j;
1327	struct elfhdr elf_ex;
1328
1329	error = -ENOEXEC;
1330	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1331	if (retval < 0)
1332		goto out;
1333
1334	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1335		goto out;
1336
1337	/* First of all, some simple consistency checks */
1338	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1339	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1340		goto out;
1341	if (elf_check_fdpic(&elf_ex))
1342		goto out;
1343
1344	/* Now read in all of the header information */
1345
1346	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1347	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1348
1349	error = -ENOMEM;
1350	elf_phdata = kmalloc(j, GFP_KERNEL);
1351	if (!elf_phdata)
1352		goto out;
1353
1354	eppnt = elf_phdata;
1355	error = -ENOEXEC;
1356	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1357	if (retval < 0)
1358		goto out_free_ph;
1359
1360	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1361		if ((eppnt + i)->p_type == PT_LOAD)
1362			j++;
1363	if (j != 1)
1364		goto out_free_ph;
1365
1366	while (eppnt->p_type != PT_LOAD)
1367		eppnt++;
1368
1369	/* Now use mmap to map the library into memory. */
1370	error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
1371			eppnt,
1372			PROT_READ | PROT_WRITE | PROT_EXEC,
1373			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1374			0);
1375
1376	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1377		goto out_free_ph;
1378
1379	error = 0;
1380
1381out_free_ph:
1382	kfree(elf_phdata);
1383out:
1384	return error;
1385}
1386#endif /* #ifdef CONFIG_USELIB */
1387
1388#ifdef CONFIG_ELF_CORE
1389/*
1390 * ELF core dumper
1391 *
1392 * Modelled on fs/exec.c:aout_core_dump()
1393 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1394 */
1395
1396/* An ELF note in memory */
1397struct memelfnote
1398{
1399	const char *name;
1400	int type;
1401	unsigned int datasz;
1402	void *data;
1403};
1404
1405static int notesize(struct memelfnote *en)
1406{
1407	int sz;
1408
1409	sz = sizeof(struct elf_note);
1410	sz += roundup(strlen(en->name) + 1, 4);
1411	sz += roundup(en->datasz, 4);
1412
1413	return sz;
1414}
1415
1416static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1417{
1418	struct elf_note en;
1419	en.n_namesz = strlen(men->name) + 1;
1420	en.n_descsz = men->datasz;
1421	en.n_type = men->type;
1422
1423	return dump_emit(cprm, &en, sizeof(en)) &&
1424	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1425	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1426}
1427
1428static void fill_elf_header(struct elfhdr *elf, int segs,
1429			    u16 machine, u32 flags)
1430{
1431	memset(elf, 0, sizeof(*elf));
1432
1433	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1434	elf->e_ident[EI_CLASS] = ELF_CLASS;
1435	elf->e_ident[EI_DATA] = ELF_DATA;
1436	elf->e_ident[EI_VERSION] = EV_CURRENT;
1437	elf->e_ident[EI_OSABI] = ELF_OSABI;
1438
1439	elf->e_type = ET_CORE;
1440	elf->e_machine = machine;
1441	elf->e_version = EV_CURRENT;
1442	elf->e_phoff = sizeof(struct elfhdr);
1443	elf->e_flags = flags;
1444	elf->e_ehsize = sizeof(struct elfhdr);
1445	elf->e_phentsize = sizeof(struct elf_phdr);
1446	elf->e_phnum = segs;
1447}
1448
1449static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1450{
1451	phdr->p_type = PT_NOTE;
1452	phdr->p_offset = offset;
1453	phdr->p_vaddr = 0;
1454	phdr->p_paddr = 0;
1455	phdr->p_filesz = sz;
1456	phdr->p_memsz = 0;
1457	phdr->p_flags = 0;
1458	phdr->p_align = 4;
1459}
1460
1461static void fill_note(struct memelfnote *note, const char *name, int type,
1462		unsigned int sz, void *data)
1463{
1464	note->name = name;
1465	note->type = type;
1466	note->datasz = sz;
1467	note->data = data;
1468}
1469
1470/*
1471 * fill up all the fields in prstatus from the given task struct, except
1472 * registers which need to be filled up separately.
1473 */
1474static void fill_prstatus(struct elf_prstatus_common *prstatus,
1475		struct task_struct *p, long signr)
1476{
1477	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1478	prstatus->pr_sigpend = p->pending.signal.sig[0];
1479	prstatus->pr_sighold = p->blocked.sig[0];
1480	rcu_read_lock();
1481	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1482	rcu_read_unlock();
1483	prstatus->pr_pid = task_pid_vnr(p);
1484	prstatus->pr_pgrp = task_pgrp_vnr(p);
1485	prstatus->pr_sid = task_session_vnr(p);
1486	if (thread_group_leader(p)) {
1487		struct task_cputime cputime;
1488
1489		/*
1490		 * This is the record for the group leader.  It shows the
1491		 * group-wide total, not its individual thread total.
1492		 */
1493		thread_group_cputime(p, &cputime);
1494		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1495		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1496	} else {
1497		u64 utime, stime;
1498
1499		task_cputime(p, &utime, &stime);
1500		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1501		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1502	}
1503
1504	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1505	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1506}
1507
1508static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1509		       struct mm_struct *mm)
1510{
1511	const struct cred *cred;
1512	unsigned int i, len;
1513	unsigned int state;
1514
1515	/* first copy the parameters from user space */
1516	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1517
1518	len = mm->arg_end - mm->arg_start;
1519	if (len >= ELF_PRARGSZ)
1520		len = ELF_PRARGSZ-1;
1521	if (copy_from_user(&psinfo->pr_psargs,
1522		           (const char __user *)mm->arg_start, len))
1523		return -EFAULT;
1524	for(i = 0; i < len; i++)
1525		if (psinfo->pr_psargs[i] == 0)
1526			psinfo->pr_psargs[i] = ' ';
1527	psinfo->pr_psargs[len] = 0;
1528
1529	rcu_read_lock();
1530	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1531	rcu_read_unlock();
1532	psinfo->pr_pid = task_pid_vnr(p);
1533	psinfo->pr_pgrp = task_pgrp_vnr(p);
1534	psinfo->pr_sid = task_session_vnr(p);
1535
1536	state = READ_ONCE(p->__state);
1537	i = state ? ffz(~state) + 1 : 0;
1538	psinfo->pr_state = i;
1539	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1540	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1541	psinfo->pr_nice = task_nice(p);
1542	psinfo->pr_flag = p->flags;
1543	rcu_read_lock();
1544	cred = __task_cred(p);
1545	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1546	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1547	rcu_read_unlock();
1548	get_task_comm(psinfo->pr_fname, p);
1549
1550	return 0;
1551}
1552
1553static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1554{
1555	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1556	int i = 0;
1557	do
1558		i += 2;
1559	while (auxv[i - 2] != AT_NULL);
1560	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1561}
1562
1563static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1564		const kernel_siginfo_t *siginfo)
1565{
1566	copy_siginfo_to_external(csigdata, siginfo);
1567	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1568}
1569
1570/*
1571 * Format of NT_FILE note:
1572 *
1573 * long count     -- how many files are mapped
1574 * long page_size -- units for file_ofs
1575 * array of [COUNT] elements of
1576 *   long start
1577 *   long end
1578 *   long file_ofs
1579 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1580 */
1581static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1582{
1583	unsigned count, size, names_ofs, remaining, n;
1584	user_long_t *data;
1585	user_long_t *start_end_ofs;
1586	char *name_base, *name_curpos;
1587	int i;
1588
1589	/* *Estimated* file count and total data size needed */
1590	count = cprm->vma_count;
1591	if (count > UINT_MAX / 64)
1592		return -EINVAL;
1593	size = count * 64;
1594
1595	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1596 alloc:
1597	/* paranoia check */
1598	if (size >= core_file_note_size_limit) {
1599		pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1600			      size);
1601		return -EINVAL;
1602	}
1603	size = round_up(size, PAGE_SIZE);
1604	/*
1605	 * "size" can be 0 here legitimately.
1606	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1607	 */
1608	data = kvmalloc(size, GFP_KERNEL);
1609	if (ZERO_OR_NULL_PTR(data))
1610		return -ENOMEM;
1611
1612	start_end_ofs = data + 2;
1613	name_base = name_curpos = ((char *)data) + names_ofs;
1614	remaining = size - names_ofs;
1615	count = 0;
1616	for (i = 0; i < cprm->vma_count; i++) {
1617		struct core_vma_metadata *m = &cprm->vma_meta[i];
1618		struct file *file;
1619		const char *filename;
1620
1621		file = m->file;
1622		if (!file)
1623			continue;
1624		filename = file_path(file, name_curpos, remaining);
1625		if (IS_ERR(filename)) {
1626			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1627				kvfree(data);
1628				size = size * 5 / 4;
1629				goto alloc;
1630			}
1631			continue;
1632		}
1633
1634		/* file_path() fills at the end, move name down */
1635		/* n = strlen(filename) + 1: */
1636		n = (name_curpos + remaining) - filename;
1637		remaining = filename - name_curpos;
1638		memmove(name_curpos, filename, n);
1639		name_curpos += n;
1640
1641		*start_end_ofs++ = m->start;
1642		*start_end_ofs++ = m->end;
1643		*start_end_ofs++ = m->pgoff;
1644		count++;
1645	}
1646
1647	/* Now we know exact count of files, can store it */
1648	data[0] = count;
1649	data[1] = PAGE_SIZE;
1650	/*
1651	 * Count usually is less than mm->map_count,
1652	 * we need to move filenames down.
1653	 */
1654	n = cprm->vma_count - count;
1655	if (n != 0) {
1656		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1657		memmove(name_base - shift_bytes, name_base,
1658			name_curpos - name_base);
1659		name_curpos -= shift_bytes;
1660	}
1661
1662	size = name_curpos - (char *)data;
1663	fill_note(note, "CORE", NT_FILE, size, data);
1664	return 0;
1665}
1666
1667#include <linux/regset.h>
1668
1669struct elf_thread_core_info {
1670	struct elf_thread_core_info *next;
1671	struct task_struct *task;
1672	struct elf_prstatus prstatus;
1673	struct memelfnote notes[];
1674};
1675
1676struct elf_note_info {
1677	struct elf_thread_core_info *thread;
1678	struct memelfnote psinfo;
1679	struct memelfnote signote;
1680	struct memelfnote auxv;
1681	struct memelfnote files;
1682	user_siginfo_t csigdata;
1683	size_t size;
1684	int thread_notes;
1685};
1686
1687#ifdef CORE_DUMP_USE_REGSET
1688/*
1689 * When a regset has a writeback hook, we call it on each thread before
1690 * dumping user memory.  On register window machines, this makes sure the
1691 * user memory backing the register data is up to date before we read it.
1692 */
1693static void do_thread_regset_writeback(struct task_struct *task,
1694				       const struct user_regset *regset)
1695{
1696	if (regset->writeback)
1697		regset->writeback(task, regset, 1);
1698}
1699
1700#ifndef PRSTATUS_SIZE
1701#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1702#endif
1703
1704#ifndef SET_PR_FPVALID
1705#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1706#endif
1707
1708static int fill_thread_core_info(struct elf_thread_core_info *t,
1709				 const struct user_regset_view *view,
1710				 long signr, struct elf_note_info *info)
1711{
1712	unsigned int note_iter, view_iter;
1713
1714	/*
1715	 * NT_PRSTATUS is the one special case, because the regset data
1716	 * goes into the pr_reg field inside the note contents, rather
1717	 * than being the whole note contents.  We fill the regset in here.
1718	 * We assume that regset 0 is NT_PRSTATUS.
1719	 */
1720	fill_prstatus(&t->prstatus.common, t->task, signr);
1721	regset_get(t->task, &view->regsets[0],
1722		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1723
1724	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1725		  PRSTATUS_SIZE, &t->prstatus);
1726	info->size += notesize(&t->notes[0]);
1727
1728	do_thread_regset_writeback(t->task, &view->regsets[0]);
1729
1730	/*
1731	 * Each other regset might generate a note too.  For each regset
1732	 * that has no core_note_type or is inactive, skip it.
1733	 */
1734	note_iter = 1;
1735	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1736		const struct user_regset *regset = &view->regsets[view_iter];
1737		int note_type = regset->core_note_type;
1738		bool is_fpreg = note_type == NT_PRFPREG;
1739		void *data;
1740		int ret;
1741
1742		do_thread_regset_writeback(t->task, regset);
1743		if (!note_type) // not for coredumps
1744			continue;
1745		if (regset->active && regset->active(t->task, regset) <= 0)
1746			continue;
1747
1748		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1749		if (ret < 0)
1750			continue;
1751
1752		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1753			break;
1754
1755		if (is_fpreg)
1756			SET_PR_FPVALID(&t->prstatus);
1757
1758		fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
1759			  note_type, ret, data);
1760
1761		info->size += notesize(&t->notes[note_iter]);
1762		note_iter++;
1763	}
1764
1765	return 1;
1766}
1767#else
1768static int fill_thread_core_info(struct elf_thread_core_info *t,
1769				 const struct user_regset_view *view,
1770				 long signr, struct elf_note_info *info)
1771{
1772	struct task_struct *p = t->task;
1773	elf_fpregset_t *fpu;
1774
1775	fill_prstatus(&t->prstatus.common, p, signr);
1776	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1777
1778	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1779		  &(t->prstatus));
1780	info->size += notesize(&t->notes[0]);
1781
1782	fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1783	if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1784		kfree(fpu);
1785		return 1;
1786	}
1787
1788	t->prstatus.pr_fpvalid = 1;
1789	fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1790	info->size += notesize(&t->notes[1]);
1791
1792	return 1;
1793}
1794#endif
1795
1796static int fill_note_info(struct elfhdr *elf, int phdrs,
1797			  struct elf_note_info *info,
1798			  struct coredump_params *cprm)
1799{
1800	struct task_struct *dump_task = current;
1801	const struct user_regset_view *view;
1802	struct elf_thread_core_info *t;
1803	struct elf_prpsinfo *psinfo;
1804	struct core_thread *ct;
1805
1806	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1807	if (!psinfo)
1808		return 0;
1809	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1810
1811#ifdef CORE_DUMP_USE_REGSET
1812	view = task_user_regset_view(dump_task);
1813
1814	/*
1815	 * Figure out how many notes we're going to need for each thread.
1816	 */
1817	info->thread_notes = 0;
1818	for (int i = 0; i < view->n; ++i)
1819		if (view->regsets[i].core_note_type != 0)
1820			++info->thread_notes;
1821
1822	/*
1823	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1824	 * since it is our one special case.
1825	 */
1826	if (unlikely(info->thread_notes == 0) ||
1827	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1828		WARN_ON(1);
1829		return 0;
1830	}
1831
1832	/*
1833	 * Initialize the ELF file header.
1834	 */
1835	fill_elf_header(elf, phdrs,
1836			view->e_machine, view->e_flags);
1837#else
1838	view = NULL;
1839	info->thread_notes = 2;
1840	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1841#endif
1842
1843	/*
1844	 * Allocate a structure for each thread.
1845	 */
1846	info->thread = kzalloc(offsetof(struct elf_thread_core_info,
1847				     notes[info->thread_notes]),
1848			    GFP_KERNEL);
1849	if (unlikely(!info->thread))
1850		return 0;
1851
1852	info->thread->task = dump_task;
1853	for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1854		t = kzalloc(offsetof(struct elf_thread_core_info,
1855				     notes[info->thread_notes]),
1856			    GFP_KERNEL);
1857		if (unlikely(!t))
1858			return 0;
1859
1860		t->task = ct->task;
1861		t->next = info->thread->next;
1862		info->thread->next = t;
1863	}
1864
1865	/*
1866	 * Now fill in each thread's information.
1867	 */
1868	for (t = info->thread; t != NULL; t = t->next)
1869		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1870			return 0;
1871
1872	/*
1873	 * Fill in the two process-wide notes.
1874	 */
1875	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1876	info->size += notesize(&info->psinfo);
1877
1878	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1879	info->size += notesize(&info->signote);
1880
1881	fill_auxv_note(&info->auxv, current->mm);
1882	info->size += notesize(&info->auxv);
1883
1884	if (fill_files_note(&info->files, cprm) == 0)
1885		info->size += notesize(&info->files);
1886
1887	return 1;
1888}
1889
1890/*
1891 * Write all the notes for each thread.  When writing the first thread, the
1892 * process-wide notes are interleaved after the first thread-specific note.
1893 */
1894static int write_note_info(struct elf_note_info *info,
1895			   struct coredump_params *cprm)
1896{
1897	bool first = true;
1898	struct elf_thread_core_info *t = info->thread;
1899
1900	do {
1901		int i;
1902
1903		if (!writenote(&t->notes[0], cprm))
1904			return 0;
1905
1906		if (first && !writenote(&info->psinfo, cprm))
1907			return 0;
1908		if (first && !writenote(&info->signote, cprm))
1909			return 0;
1910		if (first && !writenote(&info->auxv, cprm))
1911			return 0;
1912		if (first && info->files.data &&
1913				!writenote(&info->files, cprm))
1914			return 0;
1915
1916		for (i = 1; i < info->thread_notes; ++i)
1917			if (t->notes[i].data &&
1918			    !writenote(&t->notes[i], cprm))
1919				return 0;
1920
1921		first = false;
1922		t = t->next;
1923	} while (t);
1924
1925	return 1;
1926}
1927
1928static void free_note_info(struct elf_note_info *info)
1929{
1930	struct elf_thread_core_info *threads = info->thread;
1931	while (threads) {
1932		unsigned int i;
1933		struct elf_thread_core_info *t = threads;
1934		threads = t->next;
1935		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1936		for (i = 1; i < info->thread_notes; ++i)
1937			kfree(t->notes[i].data);
1938		kfree(t);
1939	}
1940	kfree(info->psinfo.data);
1941	kvfree(info->files.data);
1942}
1943
1944static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1945			     elf_addr_t e_shoff, int segs)
1946{
1947	elf->e_shoff = e_shoff;
1948	elf->e_shentsize = sizeof(*shdr4extnum);
1949	elf->e_shnum = 1;
1950	elf->e_shstrndx = SHN_UNDEF;
1951
1952	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1953
1954	shdr4extnum->sh_type = SHT_NULL;
1955	shdr4extnum->sh_size = elf->e_shnum;
1956	shdr4extnum->sh_link = elf->e_shstrndx;
1957	shdr4extnum->sh_info = segs;
1958}
1959
1960/*
1961 * Actual dumper
1962 *
1963 * This is a two-pass process; first we find the offsets of the bits,
1964 * and then they are actually written out.  If we run out of core limit
1965 * we just truncate.
1966 */
1967static int elf_core_dump(struct coredump_params *cprm)
1968{
1969	int has_dumped = 0;
1970	int segs, i;
1971	struct elfhdr elf;
1972	loff_t offset = 0, dataoff;
1973	struct elf_note_info info = { };
1974	struct elf_phdr *phdr4note = NULL;
1975	struct elf_shdr *shdr4extnum = NULL;
1976	Elf_Half e_phnum;
1977	elf_addr_t e_shoff;
1978
1979	/*
1980	 * The number of segs are recored into ELF header as 16bit value.
1981	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1982	 */
1983	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
1984
1985	/* for notes section */
1986	segs++;
1987
1988	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1989	 * this, kernel supports extended numbering. Have a look at
1990	 * include/linux/elf.h for further information. */
1991	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1992
1993	/*
1994	 * Collect all the non-memory information about the process for the
1995	 * notes.  This also sets up the file header.
1996	 */
1997	if (!fill_note_info(&elf, e_phnum, &info, cprm))
1998		goto end_coredump;
1999
2000	has_dumped = 1;
2001
2002	offset += sizeof(elf);				/* ELF header */
2003	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2004
2005	/* Write notes phdr entry */
2006	{
2007		size_t sz = info.size;
2008
2009		/* For cell spufs */
2010		sz += elf_coredump_extra_notes_size();
2011
2012		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2013		if (!phdr4note)
2014			goto end_coredump;
2015
2016		fill_elf_note_phdr(phdr4note, sz, offset);
2017		offset += sz;
2018	}
2019
2020	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2021
2022	offset += cprm->vma_data_size;
2023	offset += elf_core_extra_data_size(cprm);
2024	e_shoff = offset;
2025
2026	if (e_phnum == PN_XNUM) {
2027		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2028		if (!shdr4extnum)
2029			goto end_coredump;
2030		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2031	}
2032
2033	offset = dataoff;
2034
2035	if (!dump_emit(cprm, &elf, sizeof(elf)))
2036		goto end_coredump;
2037
2038	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2039		goto end_coredump;
2040
2041	/* Write program headers for segments dump */
2042	for (i = 0; i < cprm->vma_count; i++) {
2043		struct core_vma_metadata *meta = cprm->vma_meta + i;
2044		struct elf_phdr phdr;
2045
2046		phdr.p_type = PT_LOAD;
2047		phdr.p_offset = offset;
2048		phdr.p_vaddr = meta->start;
2049		phdr.p_paddr = 0;
2050		phdr.p_filesz = meta->dump_size;
2051		phdr.p_memsz = meta->end - meta->start;
2052		offset += phdr.p_filesz;
2053		phdr.p_flags = 0;
2054		if (meta->flags & VM_READ)
2055			phdr.p_flags |= PF_R;
2056		if (meta->flags & VM_WRITE)
2057			phdr.p_flags |= PF_W;
2058		if (meta->flags & VM_EXEC)
2059			phdr.p_flags |= PF_X;
2060		phdr.p_align = ELF_EXEC_PAGESIZE;
2061
2062		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2063			goto end_coredump;
2064	}
2065
2066	if (!elf_core_write_extra_phdrs(cprm, offset))
2067		goto end_coredump;
2068
2069	/* write out the notes section */
2070	if (!write_note_info(&info, cprm))
2071		goto end_coredump;
2072
2073	/* For cell spufs */
2074	if (elf_coredump_extra_notes_write(cprm))
2075		goto end_coredump;
2076
2077	/* Align to page */
2078	dump_skip_to(cprm, dataoff);
2079
2080	for (i = 0; i < cprm->vma_count; i++) {
2081		struct core_vma_metadata *meta = cprm->vma_meta + i;
2082
2083		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2084			goto end_coredump;
2085	}
2086
2087	if (!elf_core_write_extra_data(cprm))
2088		goto end_coredump;
2089
2090	if (e_phnum == PN_XNUM) {
2091		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2092			goto end_coredump;
2093	}
2094
2095end_coredump:
2096	free_note_info(&info);
2097	kfree(shdr4extnum);
2098	kfree(phdr4note);
2099	return has_dumped;
2100}
2101
2102#endif		/* CONFIG_ELF_CORE */
2103
2104static int __init init_elf_binfmt(void)
2105{
2106	register_binfmt(&elf_format);
2107	return 0;
2108}
2109
2110static void __exit exit_elf_binfmt(void)
2111{
2112	/* Remove the COFF and ELF loaders. */
2113	unregister_binfmt(&elf_format);
2114}
2115
2116core_initcall(init_elf_binfmt);
2117module_exit(exit_elf_binfmt);
2118
2119#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2120#include "binfmt_elf_test.c"
2121#endif
2122