1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ktrace.c 315562 2017-03-19 15:56:06Z kib $"); 36 37#include "opt_ktrace.h" 38 39#include <sys/param.h> 40#include <sys/capsicum.h> 41#include <sys/systm.h> 42#include <sys/fcntl.h> 43#include <sys/kernel.h> 44#include <sys/kthread.h> 45#include <sys/lock.h> 46#include <sys/mutex.h> 47#include <sys/malloc.h> 48#include <sys/mount.h> 49#include <sys/namei.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/unistd.h> 53#include <sys/vnode.h> 54#include <sys/socket.h> 55#include <sys/stat.h> 56#include <sys/ktrace.h> 57#include <sys/sx.h> 58#include <sys/sysctl.h> 59#include <sys/sysent.h> 60#include <sys/syslog.h> 61#include <sys/sysproto.h> 62 63#include <security/mac/mac_framework.h> 64 65/* 66 * The ktrace facility allows the tracing of certain key events in user space 67 * processes, such as system calls, signal delivery, context switches, and 68 * user generated events using utrace(2). It works by streaming event 69 * records and data to a vnode associated with the process using the 70 * ktrace(2) system call. In general, records can be written directly from 71 * the context that generates the event. One important exception to this is 72 * during a context switch, where sleeping is not permitted. To handle this 73 * case, trace events are generated using in-kernel ktr_request records, and 74 * then delivered to disk at a convenient moment -- either immediately, the 75 * next traceable event, at system call return, or at process exit. 76 * 77 * When dealing with multiple threads or processes writing to the same event 78 * log, ordering guarantees are weak: specifically, if an event has multiple 79 * records (i.e., system call enter and return), they may be interlaced with 80 * records from another event. Process and thread ID information is provided 81 * in the record, and user applications can de-interlace events if required. 82 */ 83 84static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 85 86#ifdef KTRACE 87 88FEATURE(ktrace, "Kernel support for system-call tracing"); 89 90#ifndef KTRACE_REQUEST_POOL 91#define KTRACE_REQUEST_POOL 100 92#endif 93 94struct ktr_request { 95 struct ktr_header ktr_header; 96 void *ktr_buffer; 97 union { 98 struct ktr_proc_ctor ktr_proc_ctor; 99 struct ktr_cap_fail ktr_cap_fail; 100 struct ktr_syscall ktr_syscall; 101 struct ktr_sysret ktr_sysret; 102 struct ktr_genio ktr_genio; 103 struct ktr_psig ktr_psig; 104 struct ktr_csw ktr_csw; 105 struct ktr_fault ktr_fault; 106 struct ktr_faultend ktr_faultend; 107 } ktr_data; 108 STAILQ_ENTRY(ktr_request) ktr_list; 109}; 110 111static int data_lengths[] = { 112 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 113 [KTR_SYSRET] = sizeof(struct ktr_sysret), 114 [KTR_NAMEI] = 0, 115 [KTR_GENIO] = sizeof(struct ktr_genio), 116 [KTR_PSIG] = sizeof(struct ktr_psig), 117 [KTR_CSW] = sizeof(struct ktr_csw), 118 [KTR_USER] = 0, 119 [KTR_STRUCT] = 0, 120 [KTR_SYSCTL] = 0, 121 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 122 [KTR_PROCDTOR] = 0, 123 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 124 [KTR_FAULT] = sizeof(struct ktr_fault), 125 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 126}; 127 128static STAILQ_HEAD(, ktr_request) ktr_free; 129 130static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 131 132static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 133TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 134 135u_int ktr_geniosize = PAGE_SIZE; 136TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 137SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 138 0, "Maximum size of genio event payload"); 139 140static int print_message = 1; 141static struct mtx ktrace_mtx; 142static struct sx ktrace_sx; 143 144static void ktrace_init(void *dummy); 145static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 146static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 147static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 148static struct ktr_request *ktr_getrequest(int type); 149static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 150static void ktr_freeproc(struct proc *p, struct ucred **uc, 151 struct vnode **vp); 152static void ktr_freerequest(struct ktr_request *req); 153static void ktr_freerequest_locked(struct ktr_request *req); 154static void ktr_writerequest(struct thread *td, struct ktr_request *req); 155static int ktrcanset(struct thread *,struct proc *); 156static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 157static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 158static void ktrprocctor_entered(struct thread *, struct proc *); 159 160/* 161 * ktrace itself generates events, such as context switches, which we do not 162 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 163 * whether or not it is in a region where tracing of events should be 164 * suppressed. 165 */ 166static void 167ktrace_enter(struct thread *td) 168{ 169 170 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 171 td->td_pflags |= TDP_INKTRACE; 172} 173 174static void 175ktrace_exit(struct thread *td) 176{ 177 178 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 179 td->td_pflags &= ~TDP_INKTRACE; 180} 181 182static void 183ktrace_assert(struct thread *td) 184{ 185 186 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 187} 188 189static void 190ktrace_init(void *dummy) 191{ 192 struct ktr_request *req; 193 int i; 194 195 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 196 sx_init(&ktrace_sx, "ktrace_sx"); 197 STAILQ_INIT(&ktr_free); 198 for (i = 0; i < ktr_requestpool; i++) { 199 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 200 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 201 } 202} 203SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 204 205static int 206sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 207{ 208 struct thread *td; 209 u_int newsize, oldsize, wantsize; 210 int error; 211 212 /* Handle easy read-only case first to avoid warnings from GCC. */ 213 if (!req->newptr) { 214 oldsize = ktr_requestpool; 215 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 216 } 217 218 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 219 if (error) 220 return (error); 221 td = curthread; 222 ktrace_enter(td); 223 oldsize = ktr_requestpool; 224 newsize = ktrace_resize_pool(oldsize, wantsize); 225 ktrace_exit(td); 226 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 227 if (error) 228 return (error); 229 if (wantsize > oldsize && newsize < wantsize) 230 return (ENOSPC); 231 return (0); 232} 233SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 234 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 235 "Pool buffer size for ktrace(1)"); 236 237static u_int 238ktrace_resize_pool(u_int oldsize, u_int newsize) 239{ 240 STAILQ_HEAD(, ktr_request) ktr_new; 241 struct ktr_request *req; 242 int bound; 243 244 print_message = 1; 245 bound = newsize - oldsize; 246 if (bound == 0) 247 return (ktr_requestpool); 248 if (bound < 0) { 249 mtx_lock(&ktrace_mtx); 250 /* Shrink pool down to newsize if possible. */ 251 while (bound++ < 0) { 252 req = STAILQ_FIRST(&ktr_free); 253 if (req == NULL) 254 break; 255 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 256 ktr_requestpool--; 257 free(req, M_KTRACE); 258 } 259 } else { 260 /* Grow pool up to newsize. */ 261 STAILQ_INIT(&ktr_new); 262 while (bound-- > 0) { 263 req = malloc(sizeof(struct ktr_request), M_KTRACE, 264 M_WAITOK); 265 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 266 } 267 mtx_lock(&ktrace_mtx); 268 STAILQ_CONCAT(&ktr_free, &ktr_new); 269 ktr_requestpool += (newsize - oldsize); 270 } 271 mtx_unlock(&ktrace_mtx); 272 return (ktr_requestpool); 273} 274 275/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 276CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 277 (sizeof((struct thread *)NULL)->td_name)); 278 279static struct ktr_request * 280ktr_getrequest_entered(struct thread *td, int type) 281{ 282 struct ktr_request *req; 283 struct proc *p = td->td_proc; 284 int pm; 285 286 mtx_lock(&ktrace_mtx); 287 if (!KTRCHECK(td, type)) { 288 mtx_unlock(&ktrace_mtx); 289 return (NULL); 290 } 291 req = STAILQ_FIRST(&ktr_free); 292 if (req != NULL) { 293 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 294 req->ktr_header.ktr_type = type; 295 if (p->p_traceflag & KTRFAC_DROP) { 296 req->ktr_header.ktr_type |= KTR_DROP; 297 p->p_traceflag &= ~KTRFAC_DROP; 298 } 299 mtx_unlock(&ktrace_mtx); 300 microtime(&req->ktr_header.ktr_time); 301 req->ktr_header.ktr_pid = p->p_pid; 302 req->ktr_header.ktr_tid = td->td_tid; 303 bcopy(td->td_name, req->ktr_header.ktr_comm, 304 sizeof(req->ktr_header.ktr_comm)); 305 req->ktr_buffer = NULL; 306 req->ktr_header.ktr_len = 0; 307 } else { 308 p->p_traceflag |= KTRFAC_DROP; 309 pm = print_message; 310 print_message = 0; 311 mtx_unlock(&ktrace_mtx); 312 if (pm) 313 printf("Out of ktrace request objects.\n"); 314 } 315 return (req); 316} 317 318static struct ktr_request * 319ktr_getrequest(int type) 320{ 321 struct thread *td = curthread; 322 struct ktr_request *req; 323 324 ktrace_enter(td); 325 req = ktr_getrequest_entered(td, type); 326 if (req == NULL) 327 ktrace_exit(td); 328 329 return (req); 330} 331 332/* 333 * Some trace generation environments don't permit direct access to VFS, 334 * such as during a context switch where sleeping is not allowed. Under these 335 * circumstances, queue a request to the thread to be written asynchronously 336 * later. 337 */ 338static void 339ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 340{ 341 342 mtx_lock(&ktrace_mtx); 343 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 344 mtx_unlock(&ktrace_mtx); 345} 346 347/* 348 * Drain any pending ktrace records from the per-thread queue to disk. This 349 * is used both internally before committing other records, and also on 350 * system call return. We drain all the ones we can find at the time when 351 * drain is requested, but don't keep draining after that as those events 352 * may be approximately "after" the current event. 353 */ 354static void 355ktr_drain(struct thread *td) 356{ 357 struct ktr_request *queued_req; 358 STAILQ_HEAD(, ktr_request) local_queue; 359 360 ktrace_assert(td); 361 sx_assert(&ktrace_sx, SX_XLOCKED); 362 363 STAILQ_INIT(&local_queue); 364 365 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 366 mtx_lock(&ktrace_mtx); 367 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 368 mtx_unlock(&ktrace_mtx); 369 370 while ((queued_req = STAILQ_FIRST(&local_queue))) { 371 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 372 ktr_writerequest(td, queued_req); 373 ktr_freerequest(queued_req); 374 } 375 } 376} 377 378/* 379 * Submit a trace record for immediate commit to disk -- to be used only 380 * where entering VFS is OK. First drain any pending records that may have 381 * been cached in the thread. 382 */ 383static void 384ktr_submitrequest(struct thread *td, struct ktr_request *req) 385{ 386 387 ktrace_assert(td); 388 389 sx_xlock(&ktrace_sx); 390 ktr_drain(td); 391 ktr_writerequest(td, req); 392 ktr_freerequest(req); 393 sx_xunlock(&ktrace_sx); 394 ktrace_exit(td); 395} 396 397static void 398ktr_freerequest(struct ktr_request *req) 399{ 400 401 mtx_lock(&ktrace_mtx); 402 ktr_freerequest_locked(req); 403 mtx_unlock(&ktrace_mtx); 404} 405 406static void 407ktr_freerequest_locked(struct ktr_request *req) 408{ 409 410 mtx_assert(&ktrace_mtx, MA_OWNED); 411 if (req->ktr_buffer != NULL) 412 free(req->ktr_buffer, M_KTRACE); 413 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 414} 415 416/* 417 * Disable tracing for a process and release all associated resources. 418 * The caller is responsible for releasing a reference on the returned 419 * vnode and credentials. 420 */ 421static void 422ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 423{ 424 struct ktr_request *req; 425 426 PROC_LOCK_ASSERT(p, MA_OWNED); 427 mtx_assert(&ktrace_mtx, MA_OWNED); 428 *uc = p->p_tracecred; 429 p->p_tracecred = NULL; 430 if (vp != NULL) 431 *vp = p->p_tracevp; 432 p->p_tracevp = NULL; 433 p->p_traceflag = 0; 434 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 435 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 436 ktr_freerequest_locked(req); 437 } 438} 439 440void 441ktrsyscall(code, narg, args) 442 int code, narg; 443 register_t args[]; 444{ 445 struct ktr_request *req; 446 struct ktr_syscall *ktp; 447 size_t buflen; 448 char *buf = NULL; 449 450 buflen = sizeof(register_t) * narg; 451 if (buflen > 0) { 452 buf = malloc(buflen, M_KTRACE, M_WAITOK); 453 bcopy(args, buf, buflen); 454 } 455 req = ktr_getrequest(KTR_SYSCALL); 456 if (req == NULL) { 457 if (buf != NULL) 458 free(buf, M_KTRACE); 459 return; 460 } 461 ktp = &req->ktr_data.ktr_syscall; 462 ktp->ktr_code = code; 463 ktp->ktr_narg = narg; 464 if (buflen > 0) { 465 req->ktr_header.ktr_len = buflen; 466 req->ktr_buffer = buf; 467 } 468 ktr_submitrequest(curthread, req); 469} 470 471void 472ktrsysret(code, error, retval) 473 int code, error; 474 register_t retval; 475{ 476 struct ktr_request *req; 477 struct ktr_sysret *ktp; 478 479 req = ktr_getrequest(KTR_SYSRET); 480 if (req == NULL) 481 return; 482 ktp = &req->ktr_data.ktr_sysret; 483 ktp->ktr_code = code; 484 ktp->ktr_error = error; 485 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 486 ktr_submitrequest(curthread, req); 487} 488 489/* 490 * When a setuid process execs, disable tracing. 491 * 492 * XXX: We toss any pending asynchronous records. 493 */ 494void 495ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 496{ 497 498 PROC_LOCK_ASSERT(p, MA_OWNED); 499 mtx_lock(&ktrace_mtx); 500 ktr_freeproc(p, uc, vp); 501 mtx_unlock(&ktrace_mtx); 502} 503 504/* 505 * When a process exits, drain per-process asynchronous trace records 506 * and disable tracing. 507 */ 508void 509ktrprocexit(struct thread *td) 510{ 511 struct ktr_request *req; 512 struct proc *p; 513 struct ucred *cred; 514 struct vnode *vp; 515 516 p = td->td_proc; 517 if (p->p_traceflag == 0) 518 return; 519 520 ktrace_enter(td); 521 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 522 if (req != NULL) 523 ktr_enqueuerequest(td, req); 524 sx_xlock(&ktrace_sx); 525 ktr_drain(td); 526 sx_xunlock(&ktrace_sx); 527 PROC_LOCK(p); 528 mtx_lock(&ktrace_mtx); 529 ktr_freeproc(p, &cred, &vp); 530 mtx_unlock(&ktrace_mtx); 531 PROC_UNLOCK(p); 532 if (vp != NULL) 533 vrele(vp); 534 if (cred != NULL) 535 crfree(cred); 536 ktrace_exit(td); 537} 538 539static void 540ktrprocctor_entered(struct thread *td, struct proc *p) 541{ 542 struct ktr_proc_ctor *ktp; 543 struct ktr_request *req; 544 struct thread *td2; 545 546 ktrace_assert(td); 547 td2 = FIRST_THREAD_IN_PROC(p); 548 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 549 if (req == NULL) 550 return; 551 ktp = &req->ktr_data.ktr_proc_ctor; 552 ktp->sv_flags = p->p_sysent->sv_flags; 553 ktr_enqueuerequest(td2, req); 554} 555 556void 557ktrprocctor(struct proc *p) 558{ 559 struct thread *td = curthread; 560 561 if ((p->p_traceflag & KTRFAC_MASK) == 0) 562 return; 563 564 ktrace_enter(td); 565 ktrprocctor_entered(td, p); 566 ktrace_exit(td); 567} 568 569/* 570 * When a process forks, enable tracing in the new process if needed. 571 */ 572void 573ktrprocfork(struct proc *p1, struct proc *p2) 574{ 575 576 PROC_LOCK(p1); 577 mtx_lock(&ktrace_mtx); 578 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 579 if (p1->p_traceflag & KTRFAC_INHERIT) { 580 p2->p_traceflag = p1->p_traceflag; 581 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 582 VREF(p2->p_tracevp); 583 KASSERT(p1->p_tracecred != NULL, 584 ("ktrace vnode with no cred")); 585 p2->p_tracecred = crhold(p1->p_tracecred); 586 } 587 } 588 mtx_unlock(&ktrace_mtx); 589 PROC_UNLOCK(p1); 590 591 ktrprocctor(p2); 592} 593 594/* 595 * When a thread returns, drain any asynchronous records generated by the 596 * system call. 597 */ 598void 599ktruserret(struct thread *td) 600{ 601 602 ktrace_enter(td); 603 sx_xlock(&ktrace_sx); 604 ktr_drain(td); 605 sx_xunlock(&ktrace_sx); 606 ktrace_exit(td); 607} 608 609void 610ktrnamei(path) 611 char *path; 612{ 613 struct ktr_request *req; 614 int namelen; 615 char *buf = NULL; 616 617 namelen = strlen(path); 618 if (namelen > 0) { 619 buf = malloc(namelen, M_KTRACE, M_WAITOK); 620 bcopy(path, buf, namelen); 621 } 622 req = ktr_getrequest(KTR_NAMEI); 623 if (req == NULL) { 624 if (buf != NULL) 625 free(buf, M_KTRACE); 626 return; 627 } 628 if (namelen > 0) { 629 req->ktr_header.ktr_len = namelen; 630 req->ktr_buffer = buf; 631 } 632 ktr_submitrequest(curthread, req); 633} 634 635void 636ktrsysctl(name, namelen) 637 int *name; 638 u_int namelen; 639{ 640 struct ktr_request *req; 641 u_int mib[CTL_MAXNAME + 2]; 642 char *mibname; 643 size_t mibnamelen; 644 int error; 645 646 /* Lookup name of mib. */ 647 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 648 mib[0] = 0; 649 mib[1] = 1; 650 bcopy(name, mib + 2, namelen * sizeof(*name)); 651 mibnamelen = 128; 652 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 653 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 654 NULL, 0, &mibnamelen, 0); 655 if (error) { 656 free(mibname, M_KTRACE); 657 return; 658 } 659 req = ktr_getrequest(KTR_SYSCTL); 660 if (req == NULL) { 661 free(mibname, M_KTRACE); 662 return; 663 } 664 req->ktr_header.ktr_len = mibnamelen; 665 req->ktr_buffer = mibname; 666 ktr_submitrequest(curthread, req); 667} 668 669void 670ktrgenio(fd, rw, uio, error) 671 int fd; 672 enum uio_rw rw; 673 struct uio *uio; 674 int error; 675{ 676 struct ktr_request *req; 677 struct ktr_genio *ktg; 678 int datalen; 679 char *buf; 680 681 if (error) { 682 free(uio, M_IOV); 683 return; 684 } 685 uio->uio_offset = 0; 686 uio->uio_rw = UIO_WRITE; 687 datalen = MIN(uio->uio_resid, ktr_geniosize); 688 buf = malloc(datalen, M_KTRACE, M_WAITOK); 689 error = uiomove(buf, datalen, uio); 690 free(uio, M_IOV); 691 if (error) { 692 free(buf, M_KTRACE); 693 return; 694 } 695 req = ktr_getrequest(KTR_GENIO); 696 if (req == NULL) { 697 free(buf, M_KTRACE); 698 return; 699 } 700 ktg = &req->ktr_data.ktr_genio; 701 ktg->ktr_fd = fd; 702 ktg->ktr_rw = rw; 703 req->ktr_header.ktr_len = datalen; 704 req->ktr_buffer = buf; 705 ktr_submitrequest(curthread, req); 706} 707 708void 709ktrpsig(sig, action, mask, code) 710 int sig; 711 sig_t action; 712 sigset_t *mask; 713 int code; 714{ 715 struct thread *td = curthread; 716 struct ktr_request *req; 717 struct ktr_psig *kp; 718 719 req = ktr_getrequest(KTR_PSIG); 720 if (req == NULL) 721 return; 722 kp = &req->ktr_data.ktr_psig; 723 kp->signo = (char)sig; 724 kp->action = action; 725 kp->mask = *mask; 726 kp->code = code; 727 ktr_enqueuerequest(td, req); 728 ktrace_exit(td); 729} 730 731void 732ktrcsw(out, user, wmesg) 733 int out, user; 734 const char *wmesg; 735{ 736 struct thread *td = curthread; 737 struct ktr_request *req; 738 struct ktr_csw *kc; 739 740 req = ktr_getrequest(KTR_CSW); 741 if (req == NULL) 742 return; 743 kc = &req->ktr_data.ktr_csw; 744 kc->out = out; 745 kc->user = user; 746 if (wmesg != NULL) 747 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 748 else 749 bzero(kc->wmesg, sizeof(kc->wmesg)); 750 ktr_enqueuerequest(td, req); 751 ktrace_exit(td); 752} 753 754void 755ktrstruct(name, data, datalen) 756 const char *name; 757 void *data; 758 size_t datalen; 759{ 760 struct ktr_request *req; 761 char *buf = NULL; 762 size_t buflen; 763 764 if (!data) 765 datalen = 0; 766 buflen = strlen(name) + 1 + datalen; 767 buf = malloc(buflen, M_KTRACE, M_WAITOK); 768 strcpy(buf, name); 769 bcopy(data, buf + strlen(name) + 1, datalen); 770 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 771 free(buf, M_KTRACE); 772 return; 773 } 774 req->ktr_buffer = buf; 775 req->ktr_header.ktr_len = buflen; 776 ktr_submitrequest(curthread, req); 777} 778 779void 780ktrcapfail(type, needed, held) 781 enum ktr_cap_fail_type type; 782 const cap_rights_t *needed; 783 const cap_rights_t *held; 784{ 785 struct thread *td = curthread; 786 struct ktr_request *req; 787 struct ktr_cap_fail *kcf; 788 789 req = ktr_getrequest(KTR_CAPFAIL); 790 if (req == NULL) 791 return; 792 kcf = &req->ktr_data.ktr_cap_fail; 793 kcf->cap_type = type; 794 if (needed != NULL) 795 kcf->cap_needed = *needed; 796 else 797 cap_rights_init(&kcf->cap_needed); 798 if (held != NULL) 799 kcf->cap_held = *held; 800 else 801 cap_rights_init(&kcf->cap_held); 802 ktr_enqueuerequest(td, req); 803 ktrace_exit(td); 804} 805 806void 807ktrfault(vaddr, type) 808 vm_offset_t vaddr; 809 int type; 810{ 811 struct thread *td = curthread; 812 struct ktr_request *req; 813 struct ktr_fault *kf; 814 815 req = ktr_getrequest(KTR_FAULT); 816 if (req == NULL) 817 return; 818 kf = &req->ktr_data.ktr_fault; 819 kf->vaddr = vaddr; 820 kf->type = type; 821 ktr_enqueuerequest(td, req); 822 ktrace_exit(td); 823} 824 825void 826ktrfaultend(result) 827 int result; 828{ 829 struct thread *td = curthread; 830 struct ktr_request *req; 831 struct ktr_faultend *kf; 832 833 req = ktr_getrequest(KTR_FAULTEND); 834 if (req == NULL) 835 return; 836 kf = &req->ktr_data.ktr_faultend; 837 kf->result = result; 838 ktr_enqueuerequest(td, req); 839 ktrace_exit(td); 840} 841#endif /* KTRACE */ 842 843/* Interface and common routines */ 844 845#ifndef _SYS_SYSPROTO_H_ 846struct ktrace_args { 847 char *fname; 848 int ops; 849 int facs; 850 int pid; 851}; 852#endif 853/* ARGSUSED */ 854int 855sys_ktrace(td, uap) 856 struct thread *td; 857 register struct ktrace_args *uap; 858{ 859#ifdef KTRACE 860 register struct vnode *vp = NULL; 861 register struct proc *p; 862 struct pgrp *pg; 863 int facs = uap->facs & ~KTRFAC_ROOT; 864 int ops = KTROP(uap->ops); 865 int descend = uap->ops & KTRFLAG_DESCEND; 866 int nfound, ret = 0; 867 int flags, error = 0; 868 struct nameidata nd; 869 struct ucred *cred; 870 871 /* 872 * Need something to (un)trace. 873 */ 874 if (ops != KTROP_CLEARFILE && facs == 0) 875 return (EINVAL); 876 877 ktrace_enter(td); 878 if (ops != KTROP_CLEAR) { 879 /* 880 * an operation which requires a file argument. 881 */ 882 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 883 flags = FREAD | FWRITE | O_NOFOLLOW; 884 error = vn_open(&nd, &flags, 0, NULL); 885 if (error) { 886 ktrace_exit(td); 887 return (error); 888 } 889 NDFREE(&nd, NDF_ONLY_PNBUF); 890 vp = nd.ni_vp; 891 VOP_UNLOCK(vp, 0); 892 if (vp->v_type != VREG) { 893 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 894 ktrace_exit(td); 895 return (EACCES); 896 } 897 } 898 /* 899 * Clear all uses of the tracefile. 900 */ 901 if (ops == KTROP_CLEARFILE) { 902 int vrele_count; 903 904 vrele_count = 0; 905 sx_slock(&allproc_lock); 906 FOREACH_PROC_IN_SYSTEM(p) { 907 PROC_LOCK(p); 908 if (p->p_tracevp == vp) { 909 if (ktrcanset(td, p)) { 910 mtx_lock(&ktrace_mtx); 911 ktr_freeproc(p, &cred, NULL); 912 mtx_unlock(&ktrace_mtx); 913 vrele_count++; 914 crfree(cred); 915 } else 916 error = EPERM; 917 } 918 PROC_UNLOCK(p); 919 } 920 sx_sunlock(&allproc_lock); 921 if (vrele_count > 0) { 922 while (vrele_count-- > 0) 923 vrele(vp); 924 } 925 goto done; 926 } 927 /* 928 * do it 929 */ 930 sx_slock(&proctree_lock); 931 if (uap->pid < 0) { 932 /* 933 * by process group 934 */ 935 pg = pgfind(-uap->pid); 936 if (pg == NULL) { 937 sx_sunlock(&proctree_lock); 938 error = ESRCH; 939 goto done; 940 } 941 /* 942 * ktrops() may call vrele(). Lock pg_members 943 * by the proctree_lock rather than pg_mtx. 944 */ 945 PGRP_UNLOCK(pg); 946 nfound = 0; 947 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 948 PROC_LOCK(p); 949 if (p->p_state == PRS_NEW || 950 p_cansee(td, p) != 0) { 951 PROC_UNLOCK(p); 952 continue; 953 } 954 nfound++; 955 if (descend) 956 ret |= ktrsetchildren(td, p, ops, facs, vp); 957 else 958 ret |= ktrops(td, p, ops, facs, vp); 959 } 960 if (nfound == 0) { 961 sx_sunlock(&proctree_lock); 962 error = ESRCH; 963 goto done; 964 } 965 } else { 966 /* 967 * by pid 968 */ 969 p = pfind(uap->pid); 970 if (p == NULL) 971 error = ESRCH; 972 else 973 error = p_cansee(td, p); 974 if (error) { 975 if (p != NULL) 976 PROC_UNLOCK(p); 977 sx_sunlock(&proctree_lock); 978 goto done; 979 } 980 if (descend) 981 ret |= ktrsetchildren(td, p, ops, facs, vp); 982 else 983 ret |= ktrops(td, p, ops, facs, vp); 984 } 985 sx_sunlock(&proctree_lock); 986 if (!ret) 987 error = EPERM; 988done: 989 if (vp != NULL) 990 (void) vn_close(vp, FWRITE, td->td_ucred, td); 991 ktrace_exit(td); 992 return (error); 993#else /* !KTRACE */ 994 return (ENOSYS); 995#endif /* KTRACE */ 996} 997 998/* ARGSUSED */ 999int 1000sys_utrace(td, uap) 1001 struct thread *td; 1002 register struct utrace_args *uap; 1003{ 1004 1005#ifdef KTRACE 1006 struct ktr_request *req; 1007 void *cp; 1008 int error; 1009 1010 if (!KTRPOINT(td, KTR_USER)) 1011 return (0); 1012 if (uap->len > KTR_USER_MAXLEN) 1013 return (EINVAL); 1014 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1015 error = copyin(uap->addr, cp, uap->len); 1016 if (error) { 1017 free(cp, M_KTRACE); 1018 return (error); 1019 } 1020 req = ktr_getrequest(KTR_USER); 1021 if (req == NULL) { 1022 free(cp, M_KTRACE); 1023 return (ENOMEM); 1024 } 1025 req->ktr_buffer = cp; 1026 req->ktr_header.ktr_len = uap->len; 1027 ktr_submitrequest(td, req); 1028 return (0); 1029#else /* !KTRACE */ 1030 return (ENOSYS); 1031#endif /* KTRACE */ 1032} 1033 1034#ifdef KTRACE 1035static int 1036ktrops(td, p, ops, facs, vp) 1037 struct thread *td; 1038 struct proc *p; 1039 int ops, facs; 1040 struct vnode *vp; 1041{ 1042 struct vnode *tracevp = NULL; 1043 struct ucred *tracecred = NULL; 1044 1045 PROC_LOCK_ASSERT(p, MA_OWNED); 1046 if (!ktrcanset(td, p)) { 1047 PROC_UNLOCK(p); 1048 return (0); 1049 } 1050 if (p->p_flag & P_WEXIT) { 1051 /* If the process is exiting, just ignore it. */ 1052 PROC_UNLOCK(p); 1053 return (1); 1054 } 1055 mtx_lock(&ktrace_mtx); 1056 if (ops == KTROP_SET) { 1057 if (p->p_tracevp != vp) { 1058 /* 1059 * if trace file already in use, relinquish below 1060 */ 1061 tracevp = p->p_tracevp; 1062 VREF(vp); 1063 p->p_tracevp = vp; 1064 } 1065 if (p->p_tracecred != td->td_ucred) { 1066 tracecred = p->p_tracecred; 1067 p->p_tracecred = crhold(td->td_ucred); 1068 } 1069 p->p_traceflag |= facs; 1070 if (priv_check(td, PRIV_KTRACE) == 0) 1071 p->p_traceflag |= KTRFAC_ROOT; 1072 } else { 1073 /* KTROP_CLEAR */ 1074 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1075 /* no more tracing */ 1076 ktr_freeproc(p, &tracecred, &tracevp); 1077 } 1078 mtx_unlock(&ktrace_mtx); 1079 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1080 ktrprocctor_entered(td, p); 1081 PROC_UNLOCK(p); 1082 if (tracevp != NULL) 1083 vrele(tracevp); 1084 if (tracecred != NULL) 1085 crfree(tracecred); 1086 1087 return (1); 1088} 1089 1090static int 1091ktrsetchildren(td, top, ops, facs, vp) 1092 struct thread *td; 1093 struct proc *top; 1094 int ops, facs; 1095 struct vnode *vp; 1096{ 1097 register struct proc *p; 1098 register int ret = 0; 1099 1100 p = top; 1101 PROC_LOCK_ASSERT(p, MA_OWNED); 1102 sx_assert(&proctree_lock, SX_LOCKED); 1103 for (;;) { 1104 ret |= ktrops(td, p, ops, facs, vp); 1105 /* 1106 * If this process has children, descend to them next, 1107 * otherwise do any siblings, and if done with this level, 1108 * follow back up the tree (but not past top). 1109 */ 1110 if (!LIST_EMPTY(&p->p_children)) 1111 p = LIST_FIRST(&p->p_children); 1112 else for (;;) { 1113 if (p == top) 1114 return (ret); 1115 if (LIST_NEXT(p, p_sibling)) { 1116 p = LIST_NEXT(p, p_sibling); 1117 break; 1118 } 1119 p = p->p_pptr; 1120 } 1121 PROC_LOCK(p); 1122 } 1123 /*NOTREACHED*/ 1124} 1125 1126static void 1127ktr_writerequest(struct thread *td, struct ktr_request *req) 1128{ 1129 struct ktr_header *kth; 1130 struct vnode *vp; 1131 struct proc *p; 1132 struct ucred *cred; 1133 struct uio auio; 1134 struct iovec aiov[3]; 1135 struct mount *mp; 1136 int datalen, buflen, vrele_count; 1137 int error; 1138 1139 /* 1140 * We hold the vnode and credential for use in I/O in case ktrace is 1141 * disabled on the process as we write out the request. 1142 * 1143 * XXXRW: This is not ideal: we could end up performing a write after 1144 * the vnode has been closed. 1145 */ 1146 mtx_lock(&ktrace_mtx); 1147 vp = td->td_proc->p_tracevp; 1148 cred = td->td_proc->p_tracecred; 1149 1150 /* 1151 * If vp is NULL, the vp has been cleared out from under this 1152 * request, so just drop it. Make sure the credential and vnode are 1153 * in sync: we should have both or neither. 1154 */ 1155 if (vp == NULL) { 1156 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1157 mtx_unlock(&ktrace_mtx); 1158 return; 1159 } 1160 VREF(vp); 1161 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1162 crhold(cred); 1163 mtx_unlock(&ktrace_mtx); 1164 1165 kth = &req->ktr_header; 1166 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1167 sizeof(data_lengths) / sizeof(data_lengths[0]), 1168 ("data_lengths array overflow")); 1169 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1170 buflen = kth->ktr_len; 1171 auio.uio_iov = &aiov[0]; 1172 auio.uio_offset = 0; 1173 auio.uio_segflg = UIO_SYSSPACE; 1174 auio.uio_rw = UIO_WRITE; 1175 aiov[0].iov_base = (caddr_t)kth; 1176 aiov[0].iov_len = sizeof(struct ktr_header); 1177 auio.uio_resid = sizeof(struct ktr_header); 1178 auio.uio_iovcnt = 1; 1179 auio.uio_td = td; 1180 if (datalen != 0) { 1181 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1182 aiov[1].iov_len = datalen; 1183 auio.uio_resid += datalen; 1184 auio.uio_iovcnt++; 1185 kth->ktr_len += datalen; 1186 } 1187 if (buflen != 0) { 1188 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1189 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1190 aiov[auio.uio_iovcnt].iov_len = buflen; 1191 auio.uio_resid += buflen; 1192 auio.uio_iovcnt++; 1193 } 1194 1195 vn_start_write(vp, &mp, V_WAIT); 1196 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1197#ifdef MAC 1198 error = mac_vnode_check_write(cred, NOCRED, vp); 1199 if (error == 0) 1200#endif 1201 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1202 VOP_UNLOCK(vp, 0); 1203 vn_finished_write(mp); 1204 crfree(cred); 1205 if (!error) { 1206 vrele(vp); 1207 return; 1208 } 1209 1210 /* 1211 * If error encountered, give up tracing on this vnode. We defer 1212 * all the vrele()'s on the vnode until after we are finished walking 1213 * the various lists to avoid needlessly holding locks. 1214 * NB: at this point we still hold the vnode reference that must 1215 * not go away as we need the valid vnode to compare with. Thus let 1216 * vrele_count start at 1 and the reference will be freed 1217 * by the loop at the end after our last use of vp. 1218 */ 1219 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1220 error); 1221 vrele_count = 1; 1222 /* 1223 * First, clear this vnode from being used by any processes in the 1224 * system. 1225 * XXX - If one process gets an EPERM writing to the vnode, should 1226 * we really do this? Other processes might have suitable 1227 * credentials for the operation. 1228 */ 1229 cred = NULL; 1230 sx_slock(&allproc_lock); 1231 FOREACH_PROC_IN_SYSTEM(p) { 1232 PROC_LOCK(p); 1233 if (p->p_tracevp == vp) { 1234 mtx_lock(&ktrace_mtx); 1235 ktr_freeproc(p, &cred, NULL); 1236 mtx_unlock(&ktrace_mtx); 1237 vrele_count++; 1238 } 1239 PROC_UNLOCK(p); 1240 if (cred != NULL) { 1241 crfree(cred); 1242 cred = NULL; 1243 } 1244 } 1245 sx_sunlock(&allproc_lock); 1246 1247 while (vrele_count-- > 0) 1248 vrele(vp); 1249} 1250 1251/* 1252 * Return true if caller has permission to set the ktracing state 1253 * of target. Essentially, the target can't possess any 1254 * more permissions than the caller. KTRFAC_ROOT signifies that 1255 * root previously set the tracing status on the target process, and 1256 * so, only root may further change it. 1257 */ 1258static int 1259ktrcanset(td, targetp) 1260 struct thread *td; 1261 struct proc *targetp; 1262{ 1263 1264 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1265 if (targetp->p_traceflag & KTRFAC_ROOT && 1266 priv_check(td, PRIV_KTRACE)) 1267 return (0); 1268 1269 if (p_candebug(td, targetp) != 0) 1270 return (0); 1271 1272 return (1); 1273} 1274 1275#endif /* KTRACE */ 1276