dt_link.c revision 260670
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#define	ELF_TARGET_ALL
30#include <elf.h>
31
32#include <sys/types.h>
33#if defined(sun)
34#include <sys/sysmacros.h>
35#else
36#define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
37#endif
38
39#include <unistd.h>
40#include <strings.h>
41#if defined(sun)
42#include <alloca.h>
43#endif
44#include <limits.h>
45#include <stddef.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <fcntl.h>
49#include <errno.h>
50#if defined(sun)
51#include <wait.h>
52#else
53#include <sys/wait.h>
54#include <libelf.h>
55#include <gelf.h>
56#include <sys/mman.h>
57#endif
58#include <assert.h>
59#include <sys/ipc.h>
60
61#include <dt_impl.h>
62#include <dt_provider.h>
63#include <dt_program.h>
64#include <dt_string.h>
65
66#define	ESHDR_NULL	0
67#define	ESHDR_SHSTRTAB	1
68#define	ESHDR_DOF	2
69#define	ESHDR_STRTAB	3
70#define	ESHDR_SYMTAB	4
71#define	ESHDR_REL	5
72#define	ESHDR_NUM	6
73
74#define	PWRITE_SCN(index, data) \
75	(lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \
76	(off64_t)elf_file.shdr[(index)].sh_offset || \
77	dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \
78	elf_file.shdr[(index)].sh_size)
79
80static const char DTRACE_SHSTRTAB32[] = "\0"
81".shstrtab\0"		/* 1 */
82".SUNW_dof\0"		/* 11 */
83".strtab\0"		/* 21 */
84".symtab\0"		/* 29 */
85#ifdef __sparc
86".rela.SUNW_dof";	/* 37 */
87#else
88".rel.SUNW_dof";	/* 37 */
89#endif
90
91static const char DTRACE_SHSTRTAB64[] = "\0"
92".shstrtab\0"		/* 1 */
93".SUNW_dof\0"		/* 11 */
94".strtab\0"		/* 21 */
95".symtab\0"		/* 29 */
96".rela.SUNW_dof";	/* 37 */
97
98static const char DOFSTR[] = "__SUNW_dof";
99static const char DOFLAZYSTR[] = "___SUNW_dof";
100
101typedef struct dt_link_pair {
102	struct dt_link_pair *dlp_next;	/* next pair in linked list */
103	void *dlp_str;			/* buffer for string table */
104	void *dlp_sym;			/* buffer for symbol table */
105} dt_link_pair_t;
106
107typedef struct dof_elf32 {
108	uint32_t de_nrel;		/* relocation count */
109#ifdef __sparc
110	Elf32_Rela *de_rel;		/* array of relocations for sparc */
111#else
112	Elf32_Rel *de_rel;		/* array of relocations for x86 */
113#endif
114	uint32_t de_nsym;		/* symbol count */
115	Elf32_Sym *de_sym;		/* array of symbols */
116	uint32_t de_strlen;		/* size of of string table */
117	char *de_strtab;		/* string table */
118	uint32_t de_global;		/* index of the first global symbol */
119} dof_elf32_t;
120
121static int
122prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep)
123{
124	dof_sec_t *dofs, *s;
125	dof_relohdr_t *dofrh;
126	dof_relodesc_t *dofr;
127	char *strtab;
128	int i, j, nrel;
129	size_t strtabsz = 1;
130	uint32_t count = 0;
131	size_t base;
132	Elf32_Sym *sym;
133#ifdef __sparc
134	Elf32_Rela *rel;
135#else
136	Elf32_Rel *rel;
137#endif
138
139	/*LINTED*/
140	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
141
142	/*
143	 * First compute the size of the string table and the number of
144	 * relocations present in the DOF.
145	 */
146	for (i = 0; i < dof->dofh_secnum; i++) {
147		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
148			continue;
149
150		/*LINTED*/
151		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
152
153		s = &dofs[dofrh->dofr_strtab];
154		strtab = (char *)dof + s->dofs_offset;
155		assert(strtab[0] == '\0');
156		strtabsz += s->dofs_size - 1;
157
158		s = &dofs[dofrh->dofr_relsec];
159		/*LINTED*/
160		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
161		count += s->dofs_size / s->dofs_entsize;
162	}
163
164	dep->de_strlen = strtabsz;
165	dep->de_nrel = count;
166	dep->de_nsym = count + 1; /* the first symbol is always null */
167
168	if (dtp->dt_lazyload) {
169		dep->de_strlen += sizeof (DOFLAZYSTR);
170		dep->de_nsym++;
171	} else {
172		dep->de_strlen += sizeof (DOFSTR);
173		dep->de_nsym++;
174	}
175
176	if ((dep->de_rel = calloc(dep->de_nrel,
177	    sizeof (dep->de_rel[0]))) == NULL) {
178		return (dt_set_errno(dtp, EDT_NOMEM));
179	}
180
181	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) {
182		free(dep->de_rel);
183		return (dt_set_errno(dtp, EDT_NOMEM));
184	}
185
186	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
187		free(dep->de_rel);
188		free(dep->de_sym);
189		return (dt_set_errno(dtp, EDT_NOMEM));
190	}
191
192	count = 0;
193	strtabsz = 1;
194	dep->de_strtab[0] = '\0';
195	rel = dep->de_rel;
196	sym = dep->de_sym;
197	dep->de_global = 1;
198
199	/*
200	 * The first symbol table entry must be zeroed and is always ignored.
201	 */
202	bzero(sym, sizeof (Elf32_Sym));
203	sym++;
204
205	/*
206	 * Take a second pass through the DOF sections filling in the
207	 * memory we allocated.
208	 */
209	for (i = 0; i < dof->dofh_secnum; i++) {
210		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
211			continue;
212
213		/*LINTED*/
214		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
215
216		s = &dofs[dofrh->dofr_strtab];
217		strtab = (char *)dof + s->dofs_offset;
218		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
219		base = strtabsz;
220		strtabsz += s->dofs_size - 1;
221
222		s = &dofs[dofrh->dofr_relsec];
223		/*LINTED*/
224		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
225		nrel = s->dofs_size / s->dofs_entsize;
226
227		s = &dofs[dofrh->dofr_tgtsec];
228
229		for (j = 0; j < nrel; j++) {
230#if defined(__arm__)
231/* XXX */
232printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
233#elif defined(__ia64__)
234/* XXX */
235printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
236#elif defined(__i386) || defined(__amd64)
237			rel->r_offset = s->dofs_offset +
238			    dofr[j].dofr_offset;
239			rel->r_info = ELF32_R_INFO(count + dep->de_global,
240			    R_386_32);
241#elif defined(__mips__)
242/* XXX */
243printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
244#elif defined(__powerpc__)
245			/*
246			 * Add 4 bytes to hit the low half of this 64-bit
247			 * big-endian address.
248			 */
249			rel->r_offset = s->dofs_offset +
250			    dofr[j].dofr_offset + 4;
251			rel->r_info = ELF32_R_INFO(count + dep->de_global,
252			    R_PPC_REL32);
253#elif defined(__sparc)
254			/*
255			 * Add 4 bytes to hit the low half of this 64-bit
256			 * big-endian address.
257			 */
258			rel->r_offset = s->dofs_offset +
259			    dofr[j].dofr_offset + 4;
260			rel->r_info = ELF32_R_INFO(count + dep->de_global,
261			    R_SPARC_32);
262#else
263#error unknown ISA
264#endif
265
266			sym->st_name = base + dofr[j].dofr_name - 1;
267			sym->st_value = 0;
268			sym->st_size = 0;
269			sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
270			sym->st_other = 0;
271			sym->st_shndx = SHN_UNDEF;
272
273			rel++;
274			sym++;
275			count++;
276		}
277	}
278
279	/*
280	 * Add a symbol for the DOF itself. We use a different symbol for
281	 * lazily and actively loaded DOF to make them easy to distinguish.
282	 */
283	sym->st_name = strtabsz;
284	sym->st_value = 0;
285	sym->st_size = dof->dofh_filesz;
286	sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT);
287	sym->st_other = 0;
288	sym->st_shndx = ESHDR_DOF;
289	sym++;
290
291	if (dtp->dt_lazyload) {
292		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
293		    sizeof (DOFLAZYSTR));
294		strtabsz += sizeof (DOFLAZYSTR);
295	} else {
296		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
297		strtabsz += sizeof (DOFSTR);
298	}
299
300	assert(count == dep->de_nrel);
301	assert(strtabsz == dep->de_strlen);
302
303	return (0);
304}
305
306
307typedef struct dof_elf64 {
308	uint32_t de_nrel;
309	Elf64_Rela *de_rel;
310	uint32_t de_nsym;
311	Elf64_Sym *de_sym;
312
313	uint32_t de_strlen;
314	char *de_strtab;
315
316	uint32_t de_global;
317} dof_elf64_t;
318
319static int
320prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep)
321{
322	dof_sec_t *dofs, *s;
323	dof_relohdr_t *dofrh;
324	dof_relodesc_t *dofr;
325	char *strtab;
326	int i, j, nrel;
327	size_t strtabsz = 1;
328	uint32_t count = 0;
329	size_t base;
330	Elf64_Sym *sym;
331	Elf64_Rela *rel;
332
333	/*LINTED*/
334	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
335
336	/*
337	 * First compute the size of the string table and the number of
338	 * relocations present in the DOF.
339	 */
340	for (i = 0; i < dof->dofh_secnum; i++) {
341		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
342			continue;
343
344		/*LINTED*/
345		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
346
347		s = &dofs[dofrh->dofr_strtab];
348		strtab = (char *)dof + s->dofs_offset;
349		assert(strtab[0] == '\0');
350		strtabsz += s->dofs_size - 1;
351
352		s = &dofs[dofrh->dofr_relsec];
353		/*LINTED*/
354		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
355		count += s->dofs_size / s->dofs_entsize;
356	}
357
358	dep->de_strlen = strtabsz;
359	dep->de_nrel = count;
360	dep->de_nsym = count + 1; /* the first symbol is always null */
361
362	if (dtp->dt_lazyload) {
363		dep->de_strlen += sizeof (DOFLAZYSTR);
364		dep->de_nsym++;
365	} else {
366		dep->de_strlen += sizeof (DOFSTR);
367		dep->de_nsym++;
368	}
369
370	if ((dep->de_rel = calloc(dep->de_nrel,
371	    sizeof (dep->de_rel[0]))) == NULL) {
372		return (dt_set_errno(dtp, EDT_NOMEM));
373	}
374
375	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) {
376		free(dep->de_rel);
377		return (dt_set_errno(dtp, EDT_NOMEM));
378	}
379
380	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
381		free(dep->de_rel);
382		free(dep->de_sym);
383		return (dt_set_errno(dtp, EDT_NOMEM));
384	}
385
386	count = 0;
387	strtabsz = 1;
388	dep->de_strtab[0] = '\0';
389	rel = dep->de_rel;
390	sym = dep->de_sym;
391	dep->de_global = 1;
392
393	/*
394	 * The first symbol table entry must be zeroed and is always ignored.
395	 */
396	bzero(sym, sizeof (Elf64_Sym));
397	sym++;
398
399	/*
400	 * Take a second pass through the DOF sections filling in the
401	 * memory we allocated.
402	 */
403	for (i = 0; i < dof->dofh_secnum; i++) {
404		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
405			continue;
406
407		/*LINTED*/
408		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
409
410		s = &dofs[dofrh->dofr_strtab];
411		strtab = (char *)dof + s->dofs_offset;
412		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
413		base = strtabsz;
414		strtabsz += s->dofs_size - 1;
415
416		s = &dofs[dofrh->dofr_relsec];
417		/*LINTED*/
418		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
419		nrel = s->dofs_size / s->dofs_entsize;
420
421		s = &dofs[dofrh->dofr_tgtsec];
422
423		for (j = 0; j < nrel; j++) {
424#ifdef DOODAD
425#if defined(__arm__)
426/* XXX */
427#elif defined(__ia64__)
428/* XXX */
429#elif defined(__mips__)
430/* XXX */
431#elif defined(__powerpc__)
432			rel->r_offset = s->dofs_offset +
433			    dofr[j].dofr_offset;
434			rel->r_info = ELF64_R_INFO(count + dep->de_global,
435			    R_PPC64_REL64);
436#elif defined(__i386) || defined(__amd64)
437			rel->r_offset = s->dofs_offset +
438			    dofr[j].dofr_offset;
439			rel->r_info = ELF64_R_INFO(count + dep->de_global,
440			    R_AMD64_64);
441#elif defined(__sparc)
442			rel->r_offset = s->dofs_offset +
443			    dofr[j].dofr_offset;
444			rel->r_info = ELF64_R_INFO(count + dep->de_global,
445			    R_SPARC_64);
446#else
447#error unknown ISA
448#endif
449#endif
450
451			sym->st_name = base + dofr[j].dofr_name - 1;
452			sym->st_value = 0;
453			sym->st_size = 0;
454			sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
455			sym->st_other = 0;
456			sym->st_shndx = SHN_UNDEF;
457
458			rel++;
459			sym++;
460			count++;
461		}
462	}
463
464	/*
465	 * Add a symbol for the DOF itself. We use a different symbol for
466	 * lazily and actively loaded DOF to make them easy to distinguish.
467	 */
468	sym->st_name = strtabsz;
469	sym->st_value = 0;
470	sym->st_size = dof->dofh_filesz;
471	sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT);
472	sym->st_other = 0;
473	sym->st_shndx = ESHDR_DOF;
474	sym++;
475
476	if (dtp->dt_lazyload) {
477		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
478		    sizeof (DOFLAZYSTR));
479		strtabsz += sizeof (DOFLAZYSTR);
480	} else {
481		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
482		strtabsz += sizeof (DOFSTR);
483	}
484
485	assert(count == dep->de_nrel);
486	assert(strtabsz == dep->de_strlen);
487
488	return (0);
489}
490
491/*
492 * Write out an ELF32 file prologue consisting of a header, section headers,
493 * and a section header string table.  The DOF data will follow this prologue
494 * and complete the contents of the given ELF file.
495 */
496static int
497dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
498{
499	struct {
500		Elf32_Ehdr ehdr;
501		Elf32_Shdr shdr[ESHDR_NUM];
502	} elf_file;
503
504	Elf32_Shdr *shp;
505	Elf32_Off off;
506	dof_elf32_t de;
507	int ret = 0;
508	uint_t nshdr;
509
510	if (prepare_elf32(dtp, dof, &de) != 0)
511		return (-1); /* errno is set for us */
512
513	/*
514	 * If there are no relocations, we only need enough sections for
515	 * the shstrtab and the DOF.
516	 */
517	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
518
519	bzero(&elf_file, sizeof (elf_file));
520
521	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
522	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
523	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
524	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
525	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
526	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32;
527#if BYTE_ORDER == _BIG_ENDIAN
528	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
529#else
530	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
531#endif
532#if defined(__FreeBSD__)
533	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
534#endif
535	elf_file.ehdr.e_type = ET_REL;
536#if defined(__arm__)
537	elf_file.ehdr.e_machine = EM_ARM;
538#elif defined(__ia64__)
539	elf_file.ehdr.e_machine = EM_IA_64;
540#elif defined(__mips__)
541	elf_file.ehdr.e_machine = EM_MIPS;
542#elif defined(__powerpc__)
543	elf_file.ehdr.e_machine = EM_PPC;
544#elif defined(__sparc)
545	elf_file.ehdr.e_machine = EM_SPARC;
546#elif defined(__i386) || defined(__amd64)
547	elf_file.ehdr.e_machine = EM_386;
548#endif
549	elf_file.ehdr.e_version = EV_CURRENT;
550	elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr);
551	elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr);
552	elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr);
553	elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr);
554	elf_file.ehdr.e_shnum = nshdr;
555	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
556	off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr);
557
558	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
559	shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */
560	shp->sh_type = SHT_STRTAB;
561	shp->sh_offset = off;
562	shp->sh_size = sizeof (DTRACE_SHSTRTAB32);
563	shp->sh_addralign = sizeof (char);
564	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
565
566	shp = &elf_file.shdr[ESHDR_DOF];
567	shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */
568	shp->sh_flags = SHF_ALLOC;
569	shp->sh_type = SHT_SUNW_dof;
570	shp->sh_offset = off;
571	shp->sh_size = dof->dofh_filesz;
572	shp->sh_addralign = 8;
573	off = shp->sh_offset + shp->sh_size;
574
575	shp = &elf_file.shdr[ESHDR_STRTAB];
576	shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */
577	shp->sh_flags = SHF_ALLOC;
578	shp->sh_type = SHT_STRTAB;
579	shp->sh_offset = off;
580	shp->sh_size = de.de_strlen;
581	shp->sh_addralign = sizeof (char);
582	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
583
584	shp = &elf_file.shdr[ESHDR_SYMTAB];
585	shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */
586	shp->sh_flags = SHF_ALLOC;
587	shp->sh_type = SHT_SYMTAB;
588	shp->sh_entsize = sizeof (Elf32_Sym);
589	shp->sh_link = ESHDR_STRTAB;
590	shp->sh_offset = off;
591	shp->sh_info = de.de_global;
592	shp->sh_size = de.de_nsym * sizeof (Elf32_Sym);
593	shp->sh_addralign = 4;
594	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
595
596	if (de.de_nrel == 0) {
597		if (dt_write(dtp, fd, &elf_file,
598		    sizeof (elf_file)) != sizeof (elf_file) ||
599		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
600		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
601		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
602		    PWRITE_SCN(ESHDR_DOF, dof)) {
603			ret = dt_set_errno(dtp, errno);
604		}
605	} else {
606		shp = &elf_file.shdr[ESHDR_REL];
607		shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */
608		shp->sh_flags = SHF_ALLOC;
609#ifdef __sparc
610		shp->sh_type = SHT_RELA;
611#else
612		shp->sh_type = SHT_REL;
613#endif
614		shp->sh_entsize = sizeof (de.de_rel[0]);
615		shp->sh_link = ESHDR_SYMTAB;
616		shp->sh_info = ESHDR_DOF;
617		shp->sh_offset = off;
618		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
619		shp->sh_addralign = 4;
620
621		if (dt_write(dtp, fd, &elf_file,
622		    sizeof (elf_file)) != sizeof (elf_file) ||
623		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
624		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
625		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
626		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
627		    PWRITE_SCN(ESHDR_DOF, dof)) {
628			ret = dt_set_errno(dtp, errno);
629		}
630	}
631
632	free(de.de_strtab);
633	free(de.de_sym);
634	free(de.de_rel);
635
636	return (ret);
637}
638
639/*
640 * Write out an ELF64 file prologue consisting of a header, section headers,
641 * and a section header string table.  The DOF data will follow this prologue
642 * and complete the contents of the given ELF file.
643 */
644static int
645dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
646{
647	struct {
648		Elf64_Ehdr ehdr;
649		Elf64_Shdr shdr[ESHDR_NUM];
650	} elf_file;
651
652	Elf64_Shdr *shp;
653	Elf64_Off off;
654	dof_elf64_t de;
655	int ret = 0;
656	uint_t nshdr;
657
658	if (prepare_elf64(dtp, dof, &de) != 0)
659		return (-1); /* errno is set for us */
660
661	/*
662	 * If there are no relocations, we only need enough sections for
663	 * the shstrtab and the DOF.
664	 */
665	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
666
667	bzero(&elf_file, sizeof (elf_file));
668
669	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
670	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
671	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
672	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
673	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
674	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
675#if BYTE_ORDER == _BIG_ENDIAN
676	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
677#else
678	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
679#endif
680#if defined(__FreeBSD__)
681	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
682#endif
683	elf_file.ehdr.e_type = ET_REL;
684#if defined(__arm__)
685	elf_file.ehdr.e_machine = EM_ARM;
686#elif defined(__ia64__)
687	elf_file.ehdr.e_machine = EM_IA_64;
688#elif defined(__mips__)
689	elf_file.ehdr.e_machine = EM_MIPS;
690#elif defined(__powerpc__)
691	elf_file.ehdr.e_machine = EM_PPC;
692#elif defined(__sparc)
693	elf_file.ehdr.e_machine = EM_SPARCV9;
694#elif defined(__i386) || defined(__amd64)
695	elf_file.ehdr.e_machine = EM_AMD64;
696#endif
697	elf_file.ehdr.e_version = EV_CURRENT;
698	elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr);
699	elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr);
700	elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr);
701	elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr);
702	elf_file.ehdr.e_shnum = nshdr;
703	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
704	off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr);
705
706	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
707	shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */
708	shp->sh_type = SHT_STRTAB;
709	shp->sh_offset = off;
710	shp->sh_size = sizeof (DTRACE_SHSTRTAB64);
711	shp->sh_addralign = sizeof (char);
712	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
713
714	shp = &elf_file.shdr[ESHDR_DOF];
715	shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */
716	shp->sh_flags = SHF_ALLOC;
717	shp->sh_type = SHT_SUNW_dof;
718	shp->sh_offset = off;
719	shp->sh_size = dof->dofh_filesz;
720	shp->sh_addralign = 8;
721	off = shp->sh_offset + shp->sh_size;
722
723	shp = &elf_file.shdr[ESHDR_STRTAB];
724	shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */
725	shp->sh_flags = SHF_ALLOC;
726	shp->sh_type = SHT_STRTAB;
727	shp->sh_offset = off;
728	shp->sh_size = de.de_strlen;
729	shp->sh_addralign = sizeof (char);
730	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
731
732	shp = &elf_file.shdr[ESHDR_SYMTAB];
733	shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */
734	shp->sh_flags = SHF_ALLOC;
735	shp->sh_type = SHT_SYMTAB;
736	shp->sh_entsize = sizeof (Elf64_Sym);
737	shp->sh_link = ESHDR_STRTAB;
738	shp->sh_offset = off;
739	shp->sh_info = de.de_global;
740	shp->sh_size = de.de_nsym * sizeof (Elf64_Sym);
741	shp->sh_addralign = 8;
742	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
743
744	if (de.de_nrel == 0) {
745		if (dt_write(dtp, fd, &elf_file,
746		    sizeof (elf_file)) != sizeof (elf_file) ||
747		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
748		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
749		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
750		    PWRITE_SCN(ESHDR_DOF, dof)) {
751			ret = dt_set_errno(dtp, errno);
752		}
753	} else {
754		shp = &elf_file.shdr[ESHDR_REL];
755		shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */
756		shp->sh_flags = SHF_ALLOC;
757		shp->sh_type = SHT_RELA;
758		shp->sh_entsize = sizeof (de.de_rel[0]);
759		shp->sh_link = ESHDR_SYMTAB;
760		shp->sh_info = ESHDR_DOF;
761		shp->sh_offset = off;
762		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
763		shp->sh_addralign = 8;
764
765		if (dt_write(dtp, fd, &elf_file,
766		    sizeof (elf_file)) != sizeof (elf_file) ||
767		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
768		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
769		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
770		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
771		    PWRITE_SCN(ESHDR_DOF, dof)) {
772			ret = dt_set_errno(dtp, errno);
773		}
774	}
775
776	free(de.de_strtab);
777	free(de.de_sym);
778	free(de.de_rel);
779
780	return (ret);
781}
782
783static int
784dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn,
785    GElf_Sym *sym)
786{
787	int i, ret = -1;
788	GElf_Sym s;
789
790	for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) {
791		if (GELF_ST_TYPE(sym->st_info) == STT_FUNC &&
792		    shn == sym->st_shndx &&
793		    sym->st_value <= addr &&
794		    addr < sym->st_value + sym->st_size) {
795			if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
796				return (0);
797
798			ret = 0;
799			s = *sym;
800		}
801	}
802
803	if (ret == 0)
804		*sym = s;
805	return (ret);
806}
807
808#if defined(__arm__)
809/* XXX */
810static int
811dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
812    uint32_t *off)
813{
814printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
815	return (0);
816}
817#elif defined(__ia64__)
818/* XXX */
819static int
820dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
821    uint32_t *off)
822{
823printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
824	return (0);
825}
826#elif defined(__mips__)
827/* XXX */
828static int
829dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
830    uint32_t *off)
831{
832printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
833	return (0);
834}
835#elif defined(__powerpc__)
836/* The sentinel is 'xor r3,r3,r3'. */
837#define DT_OP_XOR_R3	0x7c631a78
838
839#define DT_OP_NOP		0x60000000
840#define DT_OP_BLR		0x4e800020
841
842/* This captures all forms of branching to address. */
843#define DT_IS_BRANCH(inst)	((inst & 0xfc000000) == 0x48000000)
844#define DT_IS_BL(inst)	(DT_IS_BRANCH(inst) && (inst & 0x01))
845
846/* XXX */
847static int
848dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
849    uint32_t *off)
850{
851	uint32_t *ip;
852
853	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
854		return (-1);
855
856	/*LINTED*/
857	ip = (uint32_t *)(p + rela->r_offset);
858
859	/*
860	 * We only know about some specific relocation types.
861	 */
862	if (GELF_R_TYPE(rela->r_info) != R_PPC_REL24 &&
863	    GELF_R_TYPE(rela->r_info) != R_PPC_PLTREL24)
864		return (-1);
865
866	/*
867	 * We may have already processed this object file in an earlier linker
868	 * invocation. Check to see if the present instruction sequence matches
869	 * the one we would install below.
870	 */
871	if (isenabled) {
872		if (ip[0] == DT_OP_XOR_R3) {
873			(*off) += sizeof (ip[0]);
874			return (0);
875		}
876	} else {
877		if (ip[0] == DT_OP_NOP) {
878			(*off) += sizeof (ip[0]);
879			return (0);
880		}
881	}
882
883	/*
884	 * We only expect branch to address instructions.
885	 */
886	if (!DT_IS_BRANCH(ip[0])) {
887		dt_dprintf("found %x instead of a branch instruction at %llx\n",
888		    ip[0], (u_longlong_t)rela->r_offset);
889		return (-1);
890	}
891
892	if (isenabled) {
893		/*
894		 * It would necessarily indicate incorrect usage if an is-
895		 * enabled probe were tail-called so flag that as an error.
896		 * It's also potentially (very) tricky to handle gracefully,
897		 * but could be done if this were a desired use scenario.
898		 */
899		if (!DT_IS_BL(ip[0])) {
900			dt_dprintf("tail call to is-enabled probe at %llx\n",
901			    (u_longlong_t)rela->r_offset);
902			return (-1);
903		}
904
905		ip[0] = DT_OP_XOR_R3;
906		(*off) += sizeof (ip[0]);
907	} else {
908		if (DT_IS_BL(ip[0]))
909			ip[0] = DT_OP_NOP;
910		else
911			ip[0] = DT_OP_BLR;
912	}
913
914	return (0);
915}
916
917#elif defined(__sparc)
918
919#define	DT_OP_RET		0x81c7e008
920#define	DT_OP_NOP		0x01000000
921#define	DT_OP_CALL		0x40000000
922#define	DT_OP_CLR_O0		0x90102000
923
924#define	DT_IS_MOV_O7(inst)	(((inst) & 0xffffe000) == 0x9e100000)
925#define	DT_IS_RESTORE(inst)	(((inst) & 0xc1f80000) == 0x81e80000)
926#define	DT_IS_RETL(inst)	(((inst) & 0xfff83fff) == 0x81c02008)
927
928#define	DT_RS2(inst)		((inst) & 0x1f)
929#define	DT_MAKE_RETL(reg)	(0x81c02008 | ((reg) << 14))
930
931/*ARGSUSED*/
932static int
933dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
934    uint32_t *off)
935{
936	uint32_t *ip;
937
938	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
939		return (-1);
940
941	/*LINTED*/
942	ip = (uint32_t *)(p + rela->r_offset);
943
944	/*
945	 * We only know about some specific relocation types.
946	 */
947	if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 &&
948	    GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30)
949		return (-1);
950
951	/*
952	 * We may have already processed this object file in an earlier linker
953	 * invocation. Check to see if the present instruction sequence matches
954	 * the one we would install below.
955	 */
956	if (isenabled) {
957		if (ip[0] == DT_OP_NOP) {
958			(*off) += sizeof (ip[0]);
959			return (0);
960		}
961	} else {
962		if (DT_IS_RESTORE(ip[1])) {
963			if (ip[0] == DT_OP_RET) {
964				(*off) += sizeof (ip[0]);
965				return (0);
966			}
967		} else if (DT_IS_MOV_O7(ip[1])) {
968			if (DT_IS_RETL(ip[0]))
969				return (0);
970		} else {
971			if (ip[0] == DT_OP_NOP) {
972				(*off) += sizeof (ip[0]);
973				return (0);
974			}
975		}
976	}
977
978	/*
979	 * We only expect call instructions with a displacement of 0.
980	 */
981	if (ip[0] != DT_OP_CALL) {
982		dt_dprintf("found %x instead of a call instruction at %llx\n",
983		    ip[0], (u_longlong_t)rela->r_offset);
984		return (-1);
985	}
986
987	if (isenabled) {
988		/*
989		 * It would necessarily indicate incorrect usage if an is-
990		 * enabled probe were tail-called so flag that as an error.
991		 * It's also potentially (very) tricky to handle gracefully,
992		 * but could be done if this were a desired use scenario.
993		 */
994		if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) {
995			dt_dprintf("tail call to is-enabled probe at %llx\n",
996			    (u_longlong_t)rela->r_offset);
997			return (-1);
998		}
999
1000
1001		/*
1002		 * On SPARC, we take advantage of the fact that the first
1003		 * argument shares the same register as for the return value.
1004		 * The macro handles the work of zeroing that register so we
1005		 * don't need to do anything special here. We instrument the
1006		 * instruction in the delay slot as we'll need to modify the
1007		 * return register after that instruction has been emulated.
1008		 */
1009		ip[0] = DT_OP_NOP;
1010		(*off) += sizeof (ip[0]);
1011	} else {
1012		/*
1013		 * If the call is followed by a restore, it's a tail call so
1014		 * change the call to a ret. If the call if followed by a mov
1015		 * of a register into %o7, it's a tail call in leaf context
1016		 * so change the call to a retl-like instruction that returns
1017		 * to that register value + 8 (rather than the typical %o7 +
1018		 * 8); the delay slot instruction is left, but should have no
1019		 * effect. Otherwise we change the call to be a nop. We
1020		 * identify the subsequent instruction as the probe point in
1021		 * all but the leaf tail-call case to ensure that arguments to
1022		 * the probe are complete and consistent. An astute, though
1023		 * largely hypothetical, observer would note that there is the
1024		 * possibility of a false-positive probe firing if the function
1025		 * contained a branch to the instruction in the delay slot of
1026		 * the call. Fixing this would require significant in-kernel
1027		 * modifications, and isn't worth doing until we see it in the
1028		 * wild.
1029		 */
1030		if (DT_IS_RESTORE(ip[1])) {
1031			ip[0] = DT_OP_RET;
1032			(*off) += sizeof (ip[0]);
1033		} else if (DT_IS_MOV_O7(ip[1])) {
1034			ip[0] = DT_MAKE_RETL(DT_RS2(ip[1]));
1035		} else {
1036			ip[0] = DT_OP_NOP;
1037			(*off) += sizeof (ip[0]);
1038		}
1039	}
1040
1041	return (0);
1042}
1043
1044#elif defined(__i386) || defined(__amd64)
1045
1046#define	DT_OP_NOP		0x90
1047#define	DT_OP_RET		0xc3
1048#define	DT_OP_CALL		0xe8
1049#define	DT_OP_JMP32		0xe9
1050#define	DT_OP_REX_RAX		0x48
1051#define	DT_OP_XOR_EAX_0		0x33
1052#define	DT_OP_XOR_EAX_1		0xc0
1053
1054static int
1055dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
1056    uint32_t *off)
1057{
1058	uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1);
1059	uint8_t ret;
1060
1061	/*
1062	 * On x86, the first byte of the instruction is the call opcode and
1063	 * the next four bytes are the 32-bit address; the relocation is for
1064	 * the address operand. We back up the offset to the first byte of
1065	 * the instruction. For is-enabled probes, we later advance the offset
1066	 * so that it hits the first nop in the instruction sequence.
1067	 */
1068	(*off) -= 1;
1069
1070	/*
1071	 * We only know about some specific relocation types. Luckily
1072	 * these types have the same values on both 32-bit and 64-bit
1073	 * x86 architectures.
1074	 */
1075	if (GELF_R_TYPE(rela->r_info) != R_386_PC32 &&
1076	    GELF_R_TYPE(rela->r_info) != R_386_PLT32)
1077		return (-1);
1078
1079	/*
1080	 * We may have already processed this object file in an earlier linker
1081	 * invocation. Check to see if the present instruction sequence matches
1082	 * the one we would install. For is-enabled probes, we advance the
1083	 * offset to the first nop instruction in the sequence to match the
1084	 * text modification code below.
1085	 */
1086	if (!isenabled) {
1087		if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) &&
1088		    ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP &&
1089		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP)
1090			return (0);
1091	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
1092		if (ip[0] == DT_OP_REX_RAX &&
1093		    ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 &&
1094		    (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) &&
1095		    ip[4] == DT_OP_NOP) {
1096			(*off) += 3;
1097			return (0);
1098		}
1099	} else {
1100		if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 &&
1101		    (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) &&
1102		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) {
1103			(*off) += 2;
1104			return (0);
1105		}
1106	}
1107
1108	/*
1109	 * We expect either a call instrution with a 32-bit displacement or a
1110	 * jmp instruction with a 32-bit displacement acting as a tail-call.
1111	 */
1112	if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) {
1113		dt_dprintf("found %x instead of a call or jmp instruction at "
1114		    "%llx\n", ip[0], (u_longlong_t)rela->r_offset);
1115		return (-1);
1116	}
1117
1118	ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP;
1119
1120	/*
1121	 * Establish the instruction sequence -- all nops for probes, and an
1122	 * instruction to clear the return value register (%eax/%rax) followed
1123	 * by nops for is-enabled probes. For is-enabled probes, we advance
1124	 * the offset to the first nop. This isn't stricly necessary but makes
1125	 * for more readable disassembly when the probe is enabled.
1126	 */
1127	if (!isenabled) {
1128		ip[0] = ret;
1129		ip[1] = DT_OP_NOP;
1130		ip[2] = DT_OP_NOP;
1131		ip[3] = DT_OP_NOP;
1132		ip[4] = DT_OP_NOP;
1133	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
1134		ip[0] = DT_OP_REX_RAX;
1135		ip[1] = DT_OP_XOR_EAX_0;
1136		ip[2] = DT_OP_XOR_EAX_1;
1137		ip[3] = ret;
1138		ip[4] = DT_OP_NOP;
1139		(*off) += 3;
1140	} else {
1141		ip[0] = DT_OP_XOR_EAX_0;
1142		ip[1] = DT_OP_XOR_EAX_1;
1143		ip[2] = ret;
1144		ip[3] = DT_OP_NOP;
1145		ip[4] = DT_OP_NOP;
1146		(*off) += 2;
1147	}
1148
1149	return (0);
1150}
1151
1152#else
1153#error unknown ISA
1154#endif
1155
1156/*PRINTFLIKE5*/
1157static int
1158dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs,
1159    const char *format, ...)
1160{
1161	va_list ap;
1162	dt_link_pair_t *pair;
1163
1164	va_start(ap, format);
1165	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
1166	va_end(ap);
1167
1168	if (elf != NULL)
1169		(void) elf_end(elf);
1170
1171	if (fd >= 0)
1172		(void) close(fd);
1173
1174	while ((pair = bufs) != NULL) {
1175		bufs = pair->dlp_next;
1176		dt_free(dtp, pair->dlp_str);
1177		dt_free(dtp, pair->dlp_sym);
1178		dt_free(dtp, pair);
1179	}
1180
1181	return (dt_set_errno(dtp, EDT_COMPILER));
1182}
1183
1184static int
1185process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
1186{
1187	static const char dt_prefix[] = "__dtrace";
1188	static const char dt_enabled[] = "enabled";
1189	static const char dt_symprefix[] = "$dtrace";
1190	static const char dt_symfmt[] = "%s%ld.%s";
1191	int fd, i, ndx, eprobe, mod = 0;
1192	Elf *elf = NULL;
1193	GElf_Ehdr ehdr;
1194	Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt;
1195	Elf_Data *data_rel, *data_sym, *data_str, *data_tgt;
1196	GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt;
1197	GElf_Sym rsym, fsym, dsym;
1198	GElf_Rela rela;
1199	char *s, *p, *r;
1200	char pname[DTRACE_PROVNAMELEN];
1201	dt_provider_t *pvp;
1202	dt_probe_t *prp;
1203	uint32_t off, eclass, emachine1, emachine2;
1204	size_t symsize, nsym, isym, istr, len;
1205	key_t objkey;
1206	dt_link_pair_t *pair, *bufs = NULL;
1207	dt_strtab_t *strtab;
1208
1209	if ((fd = open64(obj, O_RDWR)) == -1) {
1210		return (dt_link_error(dtp, elf, fd, bufs,
1211		    "failed to open %s: %s", obj, strerror(errno)));
1212	}
1213
1214	if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
1215		return (dt_link_error(dtp, elf, fd, bufs,
1216		    "failed to process %s: %s", obj, elf_errmsg(elf_errno())));
1217	}
1218
1219	switch (elf_kind(elf)) {
1220	case ELF_K_ELF:
1221		break;
1222	case ELF_K_AR:
1223		return (dt_link_error(dtp, elf, fd, bufs, "archives are not "
1224		    "permitted; use the contents of the archive instead: %s",
1225		    obj));
1226	default:
1227		return (dt_link_error(dtp, elf, fd, bufs,
1228		    "invalid file type: %s", obj));
1229	}
1230
1231	if (gelf_getehdr(elf, &ehdr) == NULL) {
1232		return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s",
1233		    obj));
1234	}
1235
1236	if (dtp->dt_oflags & DTRACE_O_LP64) {
1237		eclass = ELFCLASS64;
1238#if defined(__ia64__)
1239		emachine1 = emachine2 = EM_IA_64;
1240#elif defined(__mips__)
1241		emachine1 = emachine2 = EM_MIPS;
1242#elif defined(__powerpc__)
1243		emachine1 = emachine2 = EM_PPC64;
1244#elif defined(__sparc)
1245		emachine1 = emachine2 = EM_SPARCV9;
1246#elif defined(__i386) || defined(__amd64)
1247		emachine1 = emachine2 = EM_AMD64;
1248#endif
1249		symsize = sizeof (Elf64_Sym);
1250	} else {
1251		eclass = ELFCLASS32;
1252#if defined(__arm__)
1253		emachine1 = emachine2 = EM_ARM;
1254#elif defined(__mips__)
1255		emachine1 = emachine2 = EM_MIPS;
1256#elif defined(__powerpc__)
1257		emachine1 = emachine2 = EM_PPC;
1258#elif defined(__sparc)
1259		emachine1 = EM_SPARC;
1260		emachine2 = EM_SPARC32PLUS;
1261#elif defined(__i386) || defined(__amd64) || defined(__ia64__)
1262		emachine1 = emachine2 = EM_386;
1263#endif
1264		symsize = sizeof (Elf32_Sym);
1265	}
1266
1267	if (ehdr.e_ident[EI_CLASS] != eclass) {
1268		return (dt_link_error(dtp, elf, fd, bufs,
1269		    "incorrect ELF class for object file: %s", obj));
1270	}
1271
1272	if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
1273		return (dt_link_error(dtp, elf, fd, bufs,
1274		    "incorrect ELF machine type for object file: %s", obj));
1275	}
1276
1277	/*
1278	 * We use this token as a relatively unique handle for this file on the
1279	 * system in order to disambiguate potential conflicts between files of
1280	 * the same name which contain identially named local symbols.
1281	 */
1282	if ((objkey = ftok(obj, 0)) == (key_t)-1) {
1283		return (dt_link_error(dtp, elf, fd, bufs,
1284		    "failed to generate unique key for object file: %s", obj));
1285	}
1286
1287	scn_rel = NULL;
1288	while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
1289		if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
1290			goto err;
1291
1292		/*
1293		 * Skip any non-relocation sections.
1294		 */
1295		if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
1296			continue;
1297
1298		if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
1299			goto err;
1300
1301		/*
1302		 * Grab the section, section header and section data for the
1303		 * symbol table that this relocation section references.
1304		 */
1305		if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
1306		    gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
1307		    (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
1308			goto err;
1309
1310		/*
1311		 * Ditto for that symbol table's string table.
1312		 */
1313		if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
1314		    gelf_getshdr(scn_str, &shdr_str) == NULL ||
1315		    (data_str = elf_getdata(scn_str, NULL)) == NULL)
1316			goto err;
1317
1318		/*
1319		 * Grab the section, section header and section data for the
1320		 * target section for the relocations. For the relocations
1321		 * we're looking for -- this will typically be the text of the
1322		 * object file.
1323		 */
1324		if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
1325		    gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
1326		    (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
1327			goto err;
1328
1329		/*
1330		 * We're looking for relocations to symbols matching this form:
1331		 *
1332		 *   __dtrace[enabled]_<prov>___<probe>
1333		 *
1334		 * For the generated object, we need to record the location
1335		 * identified by the relocation, and create a new relocation
1336		 * in the generated object that will be resolved at link time
1337		 * to the location of the function in which the probe is
1338		 * embedded. In the target object, we change the matched symbol
1339		 * so that it will be ignored at link time, and we modify the
1340		 * target (text) section to replace the call instruction with
1341		 * one or more nops.
1342		 *
1343		 * If the function containing the probe is locally scoped
1344		 * (static), we create an alias used by the relocation in the
1345		 * generated object. The alias, a new symbol, will be global
1346		 * (so that the relocation from the generated object can be
1347		 * resolved), and hidden (so that it is converted to a local
1348		 * symbol at link time). Such aliases have this form:
1349		 *
1350		 *   $dtrace<key>.<function>
1351		 *
1352		 * We take a first pass through all the relocations to
1353		 * populate our string table and count the number of extra
1354		 * symbols we'll require.
1355		 */
1356		strtab = dt_strtab_create(1);
1357		nsym = 0;
1358		isym = data_sym->d_size / symsize;
1359		istr = data_str->d_size;
1360
1361		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1362
1363			if (shdr_rel.sh_type == SHT_RELA) {
1364				if (gelf_getrela(data_rel, i, &rela) == NULL)
1365					continue;
1366			} else {
1367				GElf_Rel rel;
1368				if (gelf_getrel(data_rel, i, &rel) == NULL)
1369					continue;
1370				rela.r_offset = rel.r_offset;
1371				rela.r_info = rel.r_info;
1372				rela.r_addend = 0;
1373			}
1374
1375			if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
1376			    &rsym) == NULL) {
1377				dt_strtab_destroy(strtab);
1378				goto err;
1379			}
1380
1381			s = (char *)data_str->d_buf + rsym.st_name;
1382
1383			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1384				continue;
1385
1386			if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
1387			    shdr_rel.sh_info, &fsym) != 0) {
1388				dt_strtab_destroy(strtab);
1389				goto err;
1390			}
1391
1392			if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL)
1393				continue;
1394
1395			if (fsym.st_name > data_str->d_size) {
1396				dt_strtab_destroy(strtab);
1397				goto err;
1398			}
1399
1400			s = (char *)data_str->d_buf + fsym.st_name;
1401
1402			/*
1403			 * If this symbol isn't of type function, we've really
1404			 * driven off the rails or the object file is corrupt.
1405			 */
1406			if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) {
1407				dt_strtab_destroy(strtab);
1408				return (dt_link_error(dtp, elf, fd, bufs,
1409				    "expected %s to be of type function", s));
1410			}
1411
1412			len = snprintf(NULL, 0, dt_symfmt, dt_symprefix,
1413			    objkey, s) + 1;
1414			if ((p = dt_alloc(dtp, len)) == NULL) {
1415				dt_strtab_destroy(strtab);
1416				goto err;
1417			}
1418			(void) snprintf(p, len, dt_symfmt, dt_symprefix,
1419			    objkey, s);
1420
1421			if (dt_strtab_index(strtab, p) == -1) {
1422				nsym++;
1423				(void) dt_strtab_insert(strtab, p);
1424			}
1425
1426			dt_free(dtp, p);
1427		}
1428
1429		/*
1430		 * If needed, allocate the additional space for the symbol
1431		 * table and string table copying the old data into the new
1432		 * buffers, and marking the buffers as dirty. We inject those
1433		 * newly allocated buffers into the libelf data structures, but
1434		 * are still responsible for freeing them once we're done with
1435		 * the elf handle.
1436		 */
1437		if (nsym > 0) {
1438			/*
1439			 * The first byte of the string table is reserved for
1440			 * the \0 entry.
1441			 */
1442			len = dt_strtab_size(strtab) - 1;
1443
1444			assert(len > 0);
1445			assert(dt_strtab_index(strtab, "") == 0);
1446
1447			dt_strtab_destroy(strtab);
1448
1449			if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL)
1450				goto err;
1451
1452			if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size +
1453			    len)) == NULL) {
1454				dt_free(dtp, pair);
1455				goto err;
1456			}
1457
1458			if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size +
1459			    nsym * symsize)) == NULL) {
1460				dt_free(dtp, pair->dlp_str);
1461				dt_free(dtp, pair);
1462				goto err;
1463			}
1464
1465			pair->dlp_next = bufs;
1466			bufs = pair;
1467
1468			bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size);
1469			data_str->d_buf = pair->dlp_str;
1470			data_str->d_size += len;
1471			(void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY);
1472
1473			shdr_str.sh_size += len;
1474			(void) gelf_update_shdr(scn_str, &shdr_str);
1475
1476			bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size);
1477			data_sym->d_buf = pair->dlp_sym;
1478			data_sym->d_size += nsym * symsize;
1479			(void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY);
1480
1481			shdr_sym.sh_size += nsym * symsize;
1482			(void) gelf_update_shdr(scn_sym, &shdr_sym);
1483
1484			nsym += isym;
1485		} else {
1486			dt_strtab_destroy(strtab);
1487		}
1488
1489		/*
1490		 * Now that the tables have been allocated, perform the
1491		 * modifications described above.
1492		 */
1493		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1494
1495			if (shdr_rel.sh_type == SHT_RELA) {
1496				if (gelf_getrela(data_rel, i, &rela) == NULL)
1497					continue;
1498			} else {
1499				GElf_Rel rel;
1500				if (gelf_getrel(data_rel, i, &rel) == NULL)
1501					continue;
1502				rela.r_offset = rel.r_offset;
1503				rela.r_info = rel.r_info;
1504				rela.r_addend = 0;
1505			}
1506
1507			ndx = GELF_R_SYM(rela.r_info);
1508
1509			if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
1510			    rsym.st_name > data_str->d_size)
1511				goto err;
1512
1513			s = (char *)data_str->d_buf + rsym.st_name;
1514
1515			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1516				continue;
1517
1518			s += sizeof (dt_prefix) - 1;
1519
1520			/*
1521			 * Check to see if this is an 'is-enabled' check as
1522			 * opposed to a normal probe.
1523			 */
1524			if (strncmp(s, dt_enabled,
1525			    sizeof (dt_enabled) - 1) == 0) {
1526				s += sizeof (dt_enabled) - 1;
1527				eprobe = 1;
1528				*eprobesp = 1;
1529				dt_dprintf("is-enabled probe\n");
1530			} else {
1531				eprobe = 0;
1532				dt_dprintf("normal probe\n");
1533			}
1534
1535			if (*s++ != '_')
1536				goto err;
1537
1538			if ((p = strstr(s, "___")) == NULL ||
1539			    p - s >= sizeof (pname))
1540				goto err;
1541
1542			bcopy(s, pname, p - s);
1543			pname[p - s] = '\0';
1544
1545			p = strhyphenate(p + 3); /* strlen("___") */
1546
1547			if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
1548			    shdr_rel.sh_info, &fsym) != 0)
1549				goto err;
1550
1551			if (fsym.st_name > data_str->d_size)
1552				goto err;
1553
1554			assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
1555
1556			/*
1557			 * If a NULL relocation name is passed to
1558			 * dt_probe_define(), the function name is used for the
1559			 * relocation. The relocation needs to use a mangled
1560			 * name if the symbol is locally scoped; the function
1561			 * name may need to change if we've found the global
1562			 * alias for the locally scoped symbol (we prefer
1563			 * global symbols to locals in dt_symtab_lookup()).
1564			 */
1565			s = (char *)data_str->d_buf + fsym.st_name;
1566			r = NULL;
1567
1568			if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) {
1569				dsym = fsym;
1570				dsym.st_name = istr;
1571				dsym.st_info = GELF_ST_INFO(STB_GLOBAL,
1572				    STT_FUNC);
1573				dsym.st_other =
1574				    ELF64_ST_VISIBILITY(STV_ELIMINATE);
1575				(void) gelf_update_sym(data_sym, isym, &dsym);
1576
1577				r = (char *)data_str->d_buf + istr;
1578				istr += 1 + sprintf(r, dt_symfmt,
1579				    dt_symprefix, objkey, s);
1580				isym++;
1581				assert(isym <= nsym);
1582
1583			} else if (strncmp(s, dt_symprefix,
1584			    strlen(dt_symprefix)) == 0) {
1585				r = s;
1586				if ((s = strchr(s, '.')) == NULL)
1587					goto err;
1588				s++;
1589			}
1590
1591			if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) {
1592				return (dt_link_error(dtp, elf, fd, bufs,
1593				    "no such provider %s", pname));
1594			}
1595
1596			if ((prp = dt_probe_lookup(pvp, p)) == NULL) {
1597				return (dt_link_error(dtp, elf, fd, bufs,
1598				    "no such probe %s", p));
1599			}
1600
1601			assert(fsym.st_value <= rela.r_offset);
1602
1603			off = rela.r_offset - fsym.st_value;
1604			if (dt_modtext(dtp, data_tgt->d_buf, eprobe,
1605			    &rela, &off) != 0)
1606				goto err;
1607
1608			if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) {
1609				return (dt_link_error(dtp, elf, fd, bufs,
1610				    "failed to allocate space for probe"));
1611			}
1612#if !defined(sun)
1613			/*
1614			 * Our linker doesn't understand the SUNW_IGNORE ndx and
1615			 * will try to use this relocation when we build the
1616			 * final executable. Since we are done processing this
1617			 * relocation, mark it as inexistant and let libelf
1618			 * remove it from the file.
1619			 * If this wasn't done, we would have garbage added to
1620			 * the executable file as the symbol is going to be
1621			 * change from UND to ABS.
1622			 */
1623			rela.r_offset = 0;
1624			rela.r_info  = 0;
1625			rela.r_addend = 0;
1626			(void) gelf_update_rela(data_rel, i, &rela);
1627#endif
1628
1629			mod = 1;
1630			(void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY);
1631
1632			/*
1633			 * This symbol may already have been marked to
1634			 * be ignored by another relocation referencing
1635			 * the same symbol or if this object file has
1636			 * already been processed by an earlier link
1637			 * invocation.
1638			 */
1639#if !defined(sun)
1640#define SHN_SUNW_IGNORE	SHN_ABS
1641#endif
1642			if (rsym.st_shndx != SHN_SUNW_IGNORE) {
1643				rsym.st_shndx = SHN_SUNW_IGNORE;
1644				(void) gelf_update_sym(data_sym, ndx, &rsym);
1645			}
1646		}
1647	}
1648
1649	if (mod && elf_update(elf, ELF_C_WRITE) == -1)
1650		goto err;
1651
1652	(void) elf_end(elf);
1653	(void) close(fd);
1654
1655#if !defined(sun)
1656	if (nsym > 0)
1657#endif
1658	while ((pair = bufs) != NULL) {
1659		bufs = pair->dlp_next;
1660		dt_free(dtp, pair->dlp_str);
1661		dt_free(dtp, pair->dlp_sym);
1662		dt_free(dtp, pair);
1663	}
1664
1665	return (0);
1666
1667err:
1668	return (dt_link_error(dtp, elf, fd, bufs,
1669	    "an error was encountered while processing %s", obj));
1670}
1671
1672int
1673dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags,
1674    const char *file, int objc, char *const objv[])
1675{
1676#if !defined(sun)
1677	char tfile[PATH_MAX];
1678	Elf *e;
1679	Elf_Scn *scn;
1680	Elf_Data *data;
1681	GElf_Shdr shdr;
1682	int efd;
1683	size_t stridx;
1684	unsigned char *buf;
1685	char *s;
1686	int loc;
1687	GElf_Ehdr ehdr;
1688	Elf_Scn *scn0;
1689	GElf_Shdr shdr0;
1690	uint64_t off, rc;
1691#endif
1692	char drti[PATH_MAX];
1693	dof_hdr_t *dof;
1694	int fd, status, i, cur;
1695	char *cmd, tmp;
1696	size_t len;
1697	int eprobes = 0, ret = 0;
1698
1699#if !defined(sun)
1700	if (access(file, R_OK) == 0) {
1701		fprintf(stderr, "dtrace: target object (%s) already exists. "
1702		    "Please remove the target\ndtrace: object and rebuild all "
1703		    "the source objects if you wish to run the DTrace\n"
1704		    "dtrace: linking process again\n", file);
1705		/*
1706		 * Several build infrastructures run DTrace twice (e.g.
1707		 * postgres) and we don't want the build to fail. Return
1708		 * 0 here since this isn't really a fatal error.
1709		 */
1710		return (0);
1711	}
1712	/* XXX Should get a temp file name here. */
1713	snprintf(tfile, sizeof(tfile), "%s.tmp", file);
1714#endif
1715
1716	/*
1717	 * A NULL program indicates a special use in which we just link
1718	 * together a bunch of object files specified in objv and then
1719	 * unlink(2) those object files.
1720	 */
1721	if (pgp == NULL) {
1722		const char *fmt = "%s -o %s -r";
1723
1724		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1;
1725
1726		for (i = 0; i < objc; i++)
1727			len += strlen(objv[i]) + 1;
1728
1729		cmd = alloca(len);
1730
1731		cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file);
1732
1733		for (i = 0; i < objc; i++)
1734			cur += snprintf(cmd + cur, len - cur, " %s", objv[i]);
1735
1736		if ((status = system(cmd)) == -1) {
1737			return (dt_link_error(dtp, NULL, -1, NULL,
1738			    "failed to run %s: %s", dtp->dt_ld_path,
1739			    strerror(errno)));
1740		}
1741
1742		if (WIFSIGNALED(status)) {
1743			return (dt_link_error(dtp, NULL, -1, NULL,
1744			    "failed to link %s: %s failed due to signal %d",
1745			    file, dtp->dt_ld_path, WTERMSIG(status)));
1746		}
1747
1748		if (WEXITSTATUS(status) != 0) {
1749			return (dt_link_error(dtp, NULL, -1, NULL,
1750			    "failed to link %s: %s exited with status %d\n",
1751			    file, dtp->dt_ld_path, WEXITSTATUS(status)));
1752		}
1753
1754		for (i = 0; i < objc; i++) {
1755			if (strcmp(objv[i], file) != 0)
1756				(void) unlink(objv[i]);
1757		}
1758
1759		return (0);
1760	}
1761
1762	for (i = 0; i < objc; i++) {
1763		if (process_obj(dtp, objv[i], &eprobes) != 0)
1764			return (-1); /* errno is set for us */
1765	}
1766
1767	/*
1768	 * If there are is-enabled probes then we need to force use of DOF
1769	 * version 2.
1770	 */
1771	if (eprobes && pgp->dp_dofversion < DOF_VERSION_2)
1772		pgp->dp_dofversion = DOF_VERSION_2;
1773
1774	if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL)
1775		return (-1); /* errno is set for us */
1776
1777#if defined(sun)
1778	/*
1779	 * Create a temporary file and then unlink it if we're going to
1780	 * combine it with drti.o later.  We can still refer to it in child
1781	 * processes as /dev/fd/<fd>.
1782	 */
1783	if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) {
1784		return (dt_link_error(dtp, NULL, -1, NULL,
1785		    "failed to open %s: %s", file, strerror(errno)));
1786	}
1787#else
1788	if ((fd = open(tfile, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1)
1789		return (dt_link_error(dtp, NULL, -1, NULL,
1790		    "failed to open %s: %s", tfile, strerror(errno)));
1791#endif
1792
1793	/*
1794	 * If -xlinktype=DOF has been selected, just write out the DOF.
1795	 * Otherwise proceed to the default of generating and linking ELF.
1796	 */
1797	switch (dtp->dt_linktype) {
1798	case DT_LTYP_DOF:
1799		if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz)
1800			ret = errno;
1801
1802		if (close(fd) != 0 && ret == 0)
1803			ret = errno;
1804
1805		if (ret != 0) {
1806			return (dt_link_error(dtp, NULL, -1, NULL,
1807			    "failed to write %s: %s", file, strerror(ret)));
1808		}
1809
1810		return (0);
1811
1812	case DT_LTYP_ELF:
1813		break; /* fall through to the rest of dtrace_program_link() */
1814
1815	default:
1816		return (dt_link_error(dtp, NULL, -1, NULL,
1817		    "invalid link type %u\n", dtp->dt_linktype));
1818	}
1819
1820
1821#if defined(sun)
1822	if (!dtp->dt_lazyload)
1823		(void) unlink(file);
1824#endif
1825
1826#if defined(sun)
1827	if (dtp->dt_oflags & DTRACE_O_LP64)
1828		status = dump_elf64(dtp, dof, fd);
1829	else
1830		status = dump_elf32(dtp, dof, fd);
1831
1832	if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) {
1833#else
1834	/* We don't write the ELF header, just the DOF section */
1835	if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz) {
1836#endif
1837		return (dt_link_error(dtp, NULL, -1, NULL,
1838		    "failed to write %s: %s", file, strerror(errno)));
1839	}
1840
1841	if (!dtp->dt_lazyload) {
1842#if defined(sun)
1843		const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s";
1844
1845		if (dtp->dt_oflags & DTRACE_O_LP64) {
1846			(void) snprintf(drti, sizeof (drti),
1847			    "%s/64/drti.o", _dtrace_libdir);
1848		} else {
1849			(void) snprintf(drti, sizeof (drti),
1850			    "%s/drti.o", _dtrace_libdir);
1851		}
1852
1853		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd,
1854		    drti) + 1;
1855
1856		cmd = alloca(len);
1857
1858		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti);
1859#else
1860		const char *fmt = "%s -o %s -r %s";
1861
1862#if defined(__amd64__)
1863		/*
1864		 * Arches which default to 64-bit need to explicitly use
1865		 * the 32-bit library path.
1866		 */
1867		int use_32 = !(dtp->dt_oflags & DTRACE_O_LP64);
1868#else
1869		/*
1870		 * Arches which are 32-bit only just use the normal
1871		 * library path.
1872		 */
1873		int use_32 = 0;
1874#endif
1875
1876		(void) snprintf(drti, sizeof (drti), "/usr/lib%s/dtrace/drti.o",
1877		    use_32 ? "32":"");
1878
1879		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, tfile,
1880		    drti) + 1;
1881
1882#if !defined(sun)
1883		len *= 2;
1884#endif
1885		cmd = alloca(len);
1886
1887		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file,
1888		    drti);
1889#endif
1890		if ((status = system(cmd)) == -1) {
1891			ret = dt_link_error(dtp, NULL, -1, NULL,
1892			    "failed to run %s: %s", dtp->dt_ld_path,
1893			    strerror(errno));
1894			goto done;
1895		}
1896
1897		if (WIFSIGNALED(status)) {
1898			ret = dt_link_error(dtp, NULL, -1, NULL,
1899			    "failed to link %s: %s failed due to signal %d",
1900			    file, dtp->dt_ld_path, WTERMSIG(status));
1901			goto done;
1902		}
1903
1904		if (WEXITSTATUS(status) != 0) {
1905			ret = dt_link_error(dtp, NULL, -1, NULL,
1906			    "failed to link %s: %s exited with status %d\n",
1907			    file, dtp->dt_ld_path, WEXITSTATUS(status));
1908			goto done;
1909		}
1910#if !defined(sun)
1911#define BROKEN_LIBELF
1912		/*
1913		 * FreeBSD's ld(1) is not instructed to interpret and add
1914		 * correctly the SUNW_dof section present in tfile.
1915		 * We use libelf to add this section manually and hope the next
1916		 * ld invocation won't remove it.
1917		 */
1918		elf_version(EV_CURRENT);
1919		if ((efd = open(file, O_RDWR, 0)) < 0) {
1920			ret = dt_link_error(dtp, NULL, -1, NULL,
1921			    "failed to open file %s: %s",
1922			    file, strerror(errno));
1923			goto done;
1924		}
1925		if ((e = elf_begin(efd, ELF_C_RDWR, NULL)) == NULL) {
1926			close(efd);
1927			ret = dt_link_error(dtp, NULL, -1, NULL,
1928			    "failed to open elf file: %s",
1929			    elf_errmsg(elf_errno()));
1930			goto done;
1931		}
1932		/*
1933		 * Add the string '.SUWN_dof' to the shstrtab section.
1934		 */
1935#ifdef BROKEN_LIBELF
1936		elf_flagelf(e, ELF_C_SET, ELF_F_LAYOUT);
1937#endif
1938		elf_getshdrstrndx(e, &stridx);
1939		scn = elf_getscn(e, stridx);
1940		gelf_getshdr(scn, &shdr);
1941		data = elf_newdata(scn);
1942		data->d_off = shdr.sh_size;
1943		data->d_buf = ".SUNW_dof";
1944		data->d_size = 10;
1945		data->d_type = ELF_T_BYTE;
1946		loc = shdr.sh_size;
1947		shdr.sh_size += data->d_size;
1948		gelf_update_shdr(scn, &shdr);
1949#ifdef BROKEN_LIBELF
1950		off = shdr.sh_offset;
1951		rc = shdr.sh_offset + shdr.sh_size;
1952		gelf_getehdr(e, &ehdr);
1953		if (ehdr.e_shoff > off) {
1954			off = ehdr.e_shoff + ehdr.e_shnum * ehdr.e_shentsize;
1955			rc = roundup(rc, 8);
1956			ehdr.e_shoff = rc;
1957			gelf_update_ehdr(e, &ehdr);
1958			rc += ehdr.e_shnum * ehdr.e_shentsize;
1959		}
1960		for (;;) {
1961			scn0 = NULL;
1962			scn = NULL;
1963			while ((scn = elf_nextscn(e, scn)) != NULL) {
1964				gelf_getshdr(scn, &shdr);
1965				if (shdr.sh_type == SHT_NOBITS ||
1966				    shdr.sh_offset < off)
1967					continue;
1968				/* Find the immediately adjcent section. */
1969				if (scn0 == NULL ||
1970				    shdr.sh_offset < shdr0.sh_offset) {
1971					scn0 = scn;
1972					gelf_getshdr(scn0, &shdr0);
1973				}
1974			}
1975			if (scn0 == NULL)
1976				break;
1977			/* Load section data to work around another bug */
1978			elf_getdata(scn0, NULL);
1979			/* Update section header, assure section alignment */
1980			off = shdr0.sh_offset + shdr0.sh_size;
1981			rc = roundup(rc, shdr0.sh_addralign);
1982			shdr0.sh_offset = rc;
1983			gelf_update_shdr(scn0, &shdr0);
1984			rc += shdr0.sh_size;
1985		}
1986		if (elf_update(e, ELF_C_WRITE) < 0) {
1987			ret = dt_link_error(dtp, NULL, -1, NULL,
1988			    "failed to add append the shstrtab section: %s",
1989			    elf_errmsg(elf_errno()));
1990			elf_end(e);
1991			close(efd);
1992			goto done;
1993		}
1994		elf_end(e);
1995		e = elf_begin(efd, ELF_C_RDWR, NULL);
1996#endif
1997		/*
1998		 * Construct the .SUNW_dof section.
1999		 */
2000		scn = elf_newscn(e);
2001		data = elf_newdata(scn);
2002		buf = mmap(NULL, dof->dofh_filesz, PROT_READ, MAP_SHARED,
2003		    fd, 0);
2004		if (buf == MAP_FAILED) {
2005			ret = dt_link_error(dtp, NULL, -1, NULL,
2006			    "failed to mmap buffer %s", strerror(errno));
2007			elf_end(e);
2008			close(efd);
2009			goto done;
2010		}
2011		data->d_buf = buf;
2012		data->d_align = 4;
2013		data->d_size = dof->dofh_filesz;
2014		data->d_version = EV_CURRENT;
2015		gelf_getshdr(scn, &shdr);
2016		shdr.sh_name = loc;
2017		shdr.sh_flags = SHF_ALLOC;
2018		/*
2019		 * Actually this should be SHT_SUNW_dof, but FreeBSD's ld(1)
2020		 * will remove this 'unknown' section when we try to create an
2021		 * executable using the object we are modifying, so we stop
2022		 * playing by the rules and use SHT_PROGBITS.
2023		 * Also, note that our drti has modifications to handle this.
2024		 */
2025		shdr.sh_type = SHT_PROGBITS;
2026		shdr.sh_addralign = 4;
2027		gelf_update_shdr(scn, &shdr);
2028		if (elf_update(e, ELF_C_WRITE) < 0) {
2029			ret = dt_link_error(dtp, NULL, -1, NULL,
2030			    "failed to add the SUNW_dof section: %s",
2031			    elf_errmsg(elf_errno()));
2032			munmap(buf, dof->dofh_filesz);
2033			elf_end(e);
2034			close(efd);
2035			goto done;
2036		}
2037		munmap(buf, dof->dofh_filesz);
2038		elf_end(e);
2039		close(efd);
2040#endif
2041		(void) close(fd); /* release temporary file */
2042	} else {
2043		(void) close(fd);
2044	}
2045
2046done:
2047	dtrace_dof_destroy(dtp, dof);
2048
2049#if !defined(sun)
2050	unlink(tfile);
2051#endif
2052	return (ret);
2053}
2054