1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. 6 * Copyright (c) 2005 Robert N. M. Watson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 34 */ 35 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD$"); 38 39#include "opt_ktrace.h" 40 41#include <sys/param.h> 42#include <sys/capsicum.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/kernel.h> 46#include <sys/kthread.h> 47#include <sys/lock.h> 48#include <sys/mutex.h> 49#include <sys/malloc.h> 50#include <sys/mount.h> 51#include <sys/namei.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/resourcevar.h> 55#include <sys/unistd.h> 56#include <sys/vnode.h> 57#include <sys/socket.h> 58#include <sys/stat.h> 59#include <sys/ktrace.h> 60#include <sys/sx.h> 61#include <sys/sysctl.h> 62#include <sys/sysent.h> 63#include <sys/syslog.h> 64#include <sys/sysproto.h> 65 66#include <security/mac/mac_framework.h> 67 68/* 69 * The ktrace facility allows the tracing of certain key events in user space 70 * processes, such as system calls, signal delivery, context switches, and 71 * user generated events using utrace(2). It works by streaming event 72 * records and data to a vnode associated with the process using the 73 * ktrace(2) system call. In general, records can be written directly from 74 * the context that generates the event. One important exception to this is 75 * during a context switch, where sleeping is not permitted. To handle this 76 * case, trace events are generated using in-kernel ktr_request records, and 77 * then delivered to disk at a convenient moment -- either immediately, the 78 * next traceable event, at system call return, or at process exit. 79 * 80 * When dealing with multiple threads or processes writing to the same event 81 * log, ordering guarantees are weak: specifically, if an event has multiple 82 * records (i.e., system call enter and return), they may be interlaced with 83 * records from another event. Process and thread ID information is provided 84 * in the record, and user applications can de-interlace events if required. 85 */ 86 87static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 88 89#ifdef KTRACE 90 91FEATURE(ktrace, "Kernel support for system-call tracing"); 92 93#ifndef KTRACE_REQUEST_POOL 94#define KTRACE_REQUEST_POOL 100 95#endif 96 97struct ktr_request { 98 struct ktr_header ktr_header; 99 void *ktr_buffer; 100 union { 101 struct ktr_proc_ctor ktr_proc_ctor; 102 struct ktr_cap_fail ktr_cap_fail; 103 struct ktr_syscall ktr_syscall; 104 struct ktr_sysret ktr_sysret; 105 struct ktr_genio ktr_genio; 106 struct ktr_psig ktr_psig; 107 struct ktr_csw ktr_csw; 108 struct ktr_fault ktr_fault; 109 struct ktr_faultend ktr_faultend; 110 struct ktr_struct_array ktr_struct_array; 111 } ktr_data; 112 STAILQ_ENTRY(ktr_request) ktr_list; 113}; 114 115static int data_lengths[] = { 116 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 117 [KTR_SYSRET] = sizeof(struct ktr_sysret), 118 [KTR_NAMEI] = 0, 119 [KTR_GENIO] = sizeof(struct ktr_genio), 120 [KTR_PSIG] = sizeof(struct ktr_psig), 121 [KTR_CSW] = sizeof(struct ktr_csw), 122 [KTR_USER] = 0, 123 [KTR_STRUCT] = 0, 124 [KTR_SYSCTL] = 0, 125 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 126 [KTR_PROCDTOR] = 0, 127 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 128 [KTR_FAULT] = sizeof(struct ktr_fault), 129 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 130 [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), 131}; 132 133static STAILQ_HEAD(, ktr_request) ktr_free; 134 135static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 136 "KTRACE options"); 137 138static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 139TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 140 141u_int ktr_geniosize = PAGE_SIZE; 142SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 143 0, "Maximum size of genio event payload"); 144 145/* 146 * Allow to not to send signal to traced process, in which context the 147 * ktr record is written. The limit is applied from the process that 148 * set up ktrace, so killing the traced process is not completely fair. 149 */ 150int ktr_filesize_limit_signal = 0; 151SYSCTL_INT(_kern_ktrace, OID_AUTO, filesize_limit_signal, CTLFLAG_RWTUN, 152 &ktr_filesize_limit_signal, 0, 153 "Send SIGXFSZ to the traced process when the log size limit is exceeded"); 154 155static int print_message = 1; 156static struct mtx ktrace_mtx; 157static struct sx ktrace_sx; 158 159struct ktr_io_params { 160 struct vnode *vp; 161 struct ucred *cr; 162 off_t lim; 163 u_int refs; 164}; 165 166static void ktrace_init(void *dummy); 167static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 168static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 169static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 170static struct ktr_request *ktr_getrequest(int type); 171static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 172static struct ktr_io_params *ktr_freeproc(struct proc *p); 173static void ktr_freerequest(struct ktr_request *req); 174static void ktr_freerequest_locked(struct ktr_request *req); 175static void ktr_writerequest(struct thread *td, struct ktr_request *req); 176static int ktrcanset(struct thread *,struct proc *); 177static int ktrsetchildren(struct thread *, struct proc *, int, int, 178 struct ktr_io_params *); 179static int ktrops(struct thread *, struct proc *, int, int, 180 struct ktr_io_params *); 181static void ktrprocctor_entered(struct thread *, struct proc *); 182 183/* 184 * ktrace itself generates events, such as context switches, which we do not 185 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 186 * whether or not it is in a region where tracing of events should be 187 * suppressed. 188 */ 189static void 190ktrace_enter(struct thread *td) 191{ 192 193 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 194 td->td_pflags |= TDP_INKTRACE; 195} 196 197static void 198ktrace_exit(struct thread *td) 199{ 200 201 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 202 td->td_pflags &= ~TDP_INKTRACE; 203} 204 205static void 206ktrace_assert(struct thread *td) 207{ 208 209 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 210} 211 212static void 213ktrace_init(void *dummy) 214{ 215 struct ktr_request *req; 216 int i; 217 218 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 219 sx_init(&ktrace_sx, "ktrace_sx"); 220 STAILQ_INIT(&ktr_free); 221 for (i = 0; i < ktr_requestpool; i++) { 222 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 223 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 224 } 225} 226SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 227 228static int 229sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 230{ 231 struct thread *td; 232 u_int newsize, oldsize, wantsize; 233 int error; 234 235 /* Handle easy read-only case first to avoid warnings from GCC. */ 236 if (!req->newptr) { 237 oldsize = ktr_requestpool; 238 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 239 } 240 241 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 242 if (error) 243 return (error); 244 td = curthread; 245 ktrace_enter(td); 246 oldsize = ktr_requestpool; 247 newsize = ktrace_resize_pool(oldsize, wantsize); 248 ktrace_exit(td); 249 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 250 if (error) 251 return (error); 252 if (wantsize > oldsize && newsize < wantsize) 253 return (ENOSPC); 254 return (0); 255} 256SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, 257 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ktr_requestpool, 0, 258 sysctl_kern_ktrace_request_pool, "IU", 259 "Pool buffer size for ktrace(1)"); 260 261static u_int 262ktrace_resize_pool(u_int oldsize, u_int newsize) 263{ 264 STAILQ_HEAD(, ktr_request) ktr_new; 265 struct ktr_request *req; 266 int bound; 267 268 print_message = 1; 269 bound = newsize - oldsize; 270 if (bound == 0) 271 return (ktr_requestpool); 272 if (bound < 0) { 273 mtx_lock(&ktrace_mtx); 274 /* Shrink pool down to newsize if possible. */ 275 while (bound++ < 0) { 276 req = STAILQ_FIRST(&ktr_free); 277 if (req == NULL) 278 break; 279 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 280 ktr_requestpool--; 281 free(req, M_KTRACE); 282 } 283 } else { 284 /* Grow pool up to newsize. */ 285 STAILQ_INIT(&ktr_new); 286 while (bound-- > 0) { 287 req = malloc(sizeof(struct ktr_request), M_KTRACE, 288 M_WAITOK); 289 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 290 } 291 mtx_lock(&ktrace_mtx); 292 STAILQ_CONCAT(&ktr_free, &ktr_new); 293 ktr_requestpool += (newsize - oldsize); 294 } 295 mtx_unlock(&ktrace_mtx); 296 return (ktr_requestpool); 297} 298 299/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 300CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 301 (sizeof((struct thread *)NULL)->td_name)); 302 303static struct ktr_request * 304ktr_getrequest_entered(struct thread *td, int type) 305{ 306 struct ktr_request *req; 307 struct proc *p = td->td_proc; 308 int pm; 309 310 mtx_lock(&ktrace_mtx); 311 if (!KTRCHECK(td, type)) { 312 mtx_unlock(&ktrace_mtx); 313 return (NULL); 314 } 315 req = STAILQ_FIRST(&ktr_free); 316 if (req != NULL) { 317 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 318 req->ktr_header.ktr_type = type; 319 if (p->p_traceflag & KTRFAC_DROP) { 320 req->ktr_header.ktr_type |= KTR_DROP; 321 p->p_traceflag &= ~KTRFAC_DROP; 322 } 323 mtx_unlock(&ktrace_mtx); 324 microtime(&req->ktr_header.ktr_time); 325 req->ktr_header.ktr_pid = p->p_pid; 326 req->ktr_header.ktr_tid = td->td_tid; 327 bcopy(td->td_name, req->ktr_header.ktr_comm, 328 sizeof(req->ktr_header.ktr_comm)); 329 req->ktr_buffer = NULL; 330 req->ktr_header.ktr_len = 0; 331 } else { 332 p->p_traceflag |= KTRFAC_DROP; 333 pm = print_message; 334 print_message = 0; 335 mtx_unlock(&ktrace_mtx); 336 if (pm) 337 printf("Out of ktrace request objects.\n"); 338 } 339 return (req); 340} 341 342static struct ktr_request * 343ktr_getrequest(int type) 344{ 345 struct thread *td = curthread; 346 struct ktr_request *req; 347 348 ktrace_enter(td); 349 req = ktr_getrequest_entered(td, type); 350 if (req == NULL) 351 ktrace_exit(td); 352 353 return (req); 354} 355 356/* 357 * Some trace generation environments don't permit direct access to VFS, 358 * such as during a context switch where sleeping is not allowed. Under these 359 * circumstances, queue a request to the thread to be written asynchronously 360 * later. 361 */ 362static void 363ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 364{ 365 366 mtx_lock(&ktrace_mtx); 367 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 368 mtx_unlock(&ktrace_mtx); 369 thread_lock(td); 370 td->td_flags |= TDF_ASTPENDING; 371 thread_unlock(td); 372} 373 374/* 375 * Drain any pending ktrace records from the per-thread queue to disk. This 376 * is used both internally before committing other records, and also on 377 * system call return. We drain all the ones we can find at the time when 378 * drain is requested, but don't keep draining after that as those events 379 * may be approximately "after" the current event. 380 */ 381static void 382ktr_drain(struct thread *td) 383{ 384 struct ktr_request *queued_req; 385 STAILQ_HEAD(, ktr_request) local_queue; 386 387 ktrace_assert(td); 388 sx_assert(&ktrace_sx, SX_XLOCKED); 389 390 STAILQ_INIT(&local_queue); 391 392 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 393 mtx_lock(&ktrace_mtx); 394 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 395 mtx_unlock(&ktrace_mtx); 396 397 while ((queued_req = STAILQ_FIRST(&local_queue))) { 398 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 399 ktr_writerequest(td, queued_req); 400 ktr_freerequest(queued_req); 401 } 402 } 403} 404 405/* 406 * Submit a trace record for immediate commit to disk -- to be used only 407 * where entering VFS is OK. First drain any pending records that may have 408 * been cached in the thread. 409 */ 410static void 411ktr_submitrequest(struct thread *td, struct ktr_request *req) 412{ 413 414 ktrace_assert(td); 415 416 sx_xlock(&ktrace_sx); 417 ktr_drain(td); 418 ktr_writerequest(td, req); 419 ktr_freerequest(req); 420 sx_xunlock(&ktrace_sx); 421 ktrace_exit(td); 422} 423 424static void 425ktr_freerequest(struct ktr_request *req) 426{ 427 428 mtx_lock(&ktrace_mtx); 429 ktr_freerequest_locked(req); 430 mtx_unlock(&ktrace_mtx); 431} 432 433static void 434ktr_freerequest_locked(struct ktr_request *req) 435{ 436 437 mtx_assert(&ktrace_mtx, MA_OWNED); 438 if (req->ktr_buffer != NULL) 439 free(req->ktr_buffer, M_KTRACE); 440 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 441} 442 443static void 444ktr_io_params_ref(struct ktr_io_params *kiop) 445{ 446 mtx_assert(&ktrace_mtx, MA_OWNED); 447 kiop->refs++; 448} 449 450static struct ktr_io_params * 451ktr_io_params_rele(struct ktr_io_params *kiop) 452{ 453 mtx_assert(&ktrace_mtx, MA_OWNED); 454 if (kiop == NULL) 455 return (NULL); 456 KASSERT(kiop->refs > 0, ("kiop ref == 0 %p", kiop)); 457 return (--(kiop->refs) == 0 ? kiop : NULL); 458} 459 460void 461ktr_io_params_free(struct ktr_io_params *kiop) 462{ 463 if (kiop == NULL) 464 return; 465 466 MPASS(kiop->refs == 0); 467 vn_close(kiop->vp, FWRITE, kiop->cr, curthread); 468 crfree(kiop->cr); 469 free(kiop, M_KTRACE); 470} 471 472static struct ktr_io_params * 473ktr_io_params_alloc(struct thread *td, struct vnode *vp) 474{ 475 struct ktr_io_params *res; 476 477 res = malloc(sizeof(struct ktr_io_params), M_KTRACE, M_WAITOK); 478 res->vp = vp; 479 res->cr = crhold(td->td_ucred); 480 res->lim = lim_cur(td, RLIMIT_FSIZE); 481 res->refs = 1; 482 return (res); 483} 484 485/* 486 * Disable tracing for a process and release all associated resources. 487 * The caller is responsible for releasing a reference on the returned 488 * vnode and credentials. 489 */ 490static struct ktr_io_params * 491ktr_freeproc(struct proc *p) 492{ 493 struct ktr_io_params *kiop; 494 struct ktr_request *req; 495 496 PROC_LOCK_ASSERT(p, MA_OWNED); 497 mtx_assert(&ktrace_mtx, MA_OWNED); 498 kiop = ktr_io_params_rele(p->p_ktrioparms); 499 p->p_ktrioparms = NULL; 500 p->p_traceflag = 0; 501 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 502 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 503 ktr_freerequest_locked(req); 504 } 505 return (kiop); 506} 507 508struct vnode * 509ktr_get_tracevp(struct proc *p, bool ref) 510{ 511 struct vnode *vp; 512 513 PROC_LOCK_ASSERT(p, MA_OWNED); 514 515 if (p->p_ktrioparms != NULL) { 516 vp = p->p_ktrioparms->vp; 517 if (ref) 518 vrefact(vp); 519 } else { 520 vp = NULL; 521 } 522 return (vp); 523} 524 525void 526ktrsyscall(int code, int narg, register_t args[]) 527{ 528 struct ktr_request *req; 529 struct ktr_syscall *ktp; 530 size_t buflen; 531 char *buf = NULL; 532 533 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 534 return; 535 536 buflen = sizeof(register_t) * narg; 537 if (buflen > 0) { 538 buf = malloc(buflen, M_KTRACE, M_WAITOK); 539 bcopy(args, buf, buflen); 540 } 541 req = ktr_getrequest(KTR_SYSCALL); 542 if (req == NULL) { 543 if (buf != NULL) 544 free(buf, M_KTRACE); 545 return; 546 } 547 ktp = &req->ktr_data.ktr_syscall; 548 ktp->ktr_code = code; 549 ktp->ktr_narg = narg; 550 if (buflen > 0) { 551 req->ktr_header.ktr_len = buflen; 552 req->ktr_buffer = buf; 553 } 554 ktr_submitrequest(curthread, req); 555} 556 557void 558ktrsysret(int code, int error, register_t retval) 559{ 560 struct ktr_request *req; 561 struct ktr_sysret *ktp; 562 563 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 564 return; 565 566 req = ktr_getrequest(KTR_SYSRET); 567 if (req == NULL) 568 return; 569 ktp = &req->ktr_data.ktr_sysret; 570 ktp->ktr_code = code; 571 ktp->ktr_error = error; 572 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 573 ktr_submitrequest(curthread, req); 574} 575 576/* 577 * When a setuid process execs, disable tracing. 578 * 579 * XXX: We toss any pending asynchronous records. 580 */ 581struct ktr_io_params * 582ktrprocexec(struct proc *p) 583{ 584 struct ktr_io_params *kiop; 585 586 PROC_LOCK_ASSERT(p, MA_OWNED); 587 588 kiop = p->p_ktrioparms; 589 if (kiop == NULL || priv_check_cred(kiop->cr, PRIV_DEBUG_DIFFCRED)) 590 return (NULL); 591 592 mtx_lock(&ktrace_mtx); 593 kiop = ktr_freeproc(p); 594 mtx_unlock(&ktrace_mtx); 595 return (kiop); 596} 597 598/* 599 * When a process exits, drain per-process asynchronous trace records 600 * and disable tracing. 601 */ 602void 603ktrprocexit(struct thread *td) 604{ 605 struct ktr_request *req; 606 struct proc *p; 607 struct ktr_io_params *kiop; 608 609 p = td->td_proc; 610 if (p->p_traceflag == 0) 611 return; 612 613 ktrace_enter(td); 614 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 615 if (req != NULL) 616 ktr_enqueuerequest(td, req); 617 sx_xlock(&ktrace_sx); 618 ktr_drain(td); 619 sx_xunlock(&ktrace_sx); 620 PROC_LOCK(p); 621 mtx_lock(&ktrace_mtx); 622 kiop = ktr_freeproc(p); 623 mtx_unlock(&ktrace_mtx); 624 PROC_UNLOCK(p); 625 ktr_io_params_free(kiop); 626 ktrace_exit(td); 627} 628 629static void 630ktrprocctor_entered(struct thread *td, struct proc *p) 631{ 632 struct ktr_proc_ctor *ktp; 633 struct ktr_request *req; 634 struct thread *td2; 635 636 ktrace_assert(td); 637 td2 = FIRST_THREAD_IN_PROC(p); 638 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 639 if (req == NULL) 640 return; 641 ktp = &req->ktr_data.ktr_proc_ctor; 642 ktp->sv_flags = p->p_sysent->sv_flags; 643 ktr_enqueuerequest(td2, req); 644} 645 646void 647ktrprocctor(struct proc *p) 648{ 649 struct thread *td = curthread; 650 651 if ((p->p_traceflag & KTRFAC_MASK) == 0) 652 return; 653 654 ktrace_enter(td); 655 ktrprocctor_entered(td, p); 656 ktrace_exit(td); 657} 658 659/* 660 * When a process forks, enable tracing in the new process if needed. 661 */ 662void 663ktrprocfork(struct proc *p1, struct proc *p2) 664{ 665 666 MPASS(p2->p_ktrioparms == NULL); 667 MPASS(p2->p_traceflag == 0); 668 669 if (p1->p_traceflag == 0) 670 return; 671 672 PROC_LOCK(p1); 673 mtx_lock(&ktrace_mtx); 674 if (p1->p_traceflag & KTRFAC_INHERIT) { 675 p2->p_traceflag = p1->p_traceflag; 676 if ((p2->p_ktrioparms = p1->p_ktrioparms) != NULL) 677 p1->p_ktrioparms->refs++; 678 } 679 mtx_unlock(&ktrace_mtx); 680 PROC_UNLOCK(p1); 681 682 ktrprocctor(p2); 683} 684 685/* 686 * When a thread returns, drain any asynchronous records generated by the 687 * system call. 688 */ 689void 690ktruserret(struct thread *td) 691{ 692 693 ktrace_enter(td); 694 sx_xlock(&ktrace_sx); 695 ktr_drain(td); 696 sx_xunlock(&ktrace_sx); 697 ktrace_exit(td); 698} 699 700void 701ktrnamei(path) 702 char *path; 703{ 704 struct ktr_request *req; 705 int namelen; 706 char *buf = NULL; 707 708 namelen = strlen(path); 709 if (namelen > 0) { 710 buf = malloc(namelen, M_KTRACE, M_WAITOK); 711 bcopy(path, buf, namelen); 712 } 713 req = ktr_getrequest(KTR_NAMEI); 714 if (req == NULL) { 715 if (buf != NULL) 716 free(buf, M_KTRACE); 717 return; 718 } 719 if (namelen > 0) { 720 req->ktr_header.ktr_len = namelen; 721 req->ktr_buffer = buf; 722 } 723 ktr_submitrequest(curthread, req); 724} 725 726void 727ktrsysctl(int *name, u_int namelen) 728{ 729 struct ktr_request *req; 730 u_int mib[CTL_MAXNAME + 2]; 731 char *mibname; 732 size_t mibnamelen; 733 int error; 734 735 /* Lookup name of mib. */ 736 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 737 mib[0] = 0; 738 mib[1] = 1; 739 bcopy(name, mib + 2, namelen * sizeof(*name)); 740 mibnamelen = 128; 741 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 742 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 743 NULL, 0, &mibnamelen, 0); 744 if (error) { 745 free(mibname, M_KTRACE); 746 return; 747 } 748 req = ktr_getrequest(KTR_SYSCTL); 749 if (req == NULL) { 750 free(mibname, M_KTRACE); 751 return; 752 } 753 req->ktr_header.ktr_len = mibnamelen; 754 req->ktr_buffer = mibname; 755 ktr_submitrequest(curthread, req); 756} 757 758void 759ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) 760{ 761 struct ktr_request *req; 762 struct ktr_genio *ktg; 763 int datalen; 764 char *buf; 765 766 if (error) { 767 free(uio, M_IOV); 768 return; 769 } 770 uio->uio_offset = 0; 771 uio->uio_rw = UIO_WRITE; 772 datalen = MIN(uio->uio_resid, ktr_geniosize); 773 buf = malloc(datalen, M_KTRACE, M_WAITOK); 774 error = uiomove(buf, datalen, uio); 775 free(uio, M_IOV); 776 if (error) { 777 free(buf, M_KTRACE); 778 return; 779 } 780 req = ktr_getrequest(KTR_GENIO); 781 if (req == NULL) { 782 free(buf, M_KTRACE); 783 return; 784 } 785 ktg = &req->ktr_data.ktr_genio; 786 ktg->ktr_fd = fd; 787 ktg->ktr_rw = rw; 788 req->ktr_header.ktr_len = datalen; 789 req->ktr_buffer = buf; 790 ktr_submitrequest(curthread, req); 791} 792 793void 794ktrpsig(int sig, sig_t action, sigset_t *mask, int code) 795{ 796 struct thread *td = curthread; 797 struct ktr_request *req; 798 struct ktr_psig *kp; 799 800 req = ktr_getrequest(KTR_PSIG); 801 if (req == NULL) 802 return; 803 kp = &req->ktr_data.ktr_psig; 804 kp->signo = (char)sig; 805 kp->action = action; 806 kp->mask = *mask; 807 kp->code = code; 808 ktr_enqueuerequest(td, req); 809 ktrace_exit(td); 810} 811 812void 813ktrcsw(int out, int user, const char *wmesg) 814{ 815 struct thread *td = curthread; 816 struct ktr_request *req; 817 struct ktr_csw *kc; 818 819 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 820 return; 821 822 req = ktr_getrequest(KTR_CSW); 823 if (req == NULL) 824 return; 825 kc = &req->ktr_data.ktr_csw; 826 kc->out = out; 827 kc->user = user; 828 if (wmesg != NULL) 829 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 830 else 831 bzero(kc->wmesg, sizeof(kc->wmesg)); 832 ktr_enqueuerequest(td, req); 833 ktrace_exit(td); 834} 835 836void 837ktrstruct(const char *name, const void *data, size_t datalen) 838{ 839 struct ktr_request *req; 840 char *buf; 841 size_t buflen, namelen; 842 843 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 844 return; 845 846 if (data == NULL) 847 datalen = 0; 848 namelen = strlen(name) + 1; 849 buflen = namelen + datalen; 850 buf = malloc(buflen, M_KTRACE, M_WAITOK); 851 strcpy(buf, name); 852 bcopy(data, buf + namelen, datalen); 853 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 854 free(buf, M_KTRACE); 855 return; 856 } 857 req->ktr_buffer = buf; 858 req->ktr_header.ktr_len = buflen; 859 ktr_submitrequest(curthread, req); 860} 861 862void 863ktrstruct_error(const char *name, const void *data, size_t datalen, int error) 864{ 865 866 if (error == 0) 867 ktrstruct(name, data, datalen); 868} 869 870void 871ktrstructarray(const char *name, enum uio_seg seg, const void *data, 872 int num_items, size_t struct_size) 873{ 874 struct ktr_request *req; 875 struct ktr_struct_array *ksa; 876 char *buf; 877 size_t buflen, datalen, namelen; 878 int max_items; 879 880 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 881 return; 882 if (num_items < 0) 883 return; 884 885 /* Trim array length to genio size. */ 886 max_items = ktr_geniosize / struct_size; 887 if (num_items > max_items) { 888 if (max_items == 0) 889 num_items = 1; 890 else 891 num_items = max_items; 892 } 893 datalen = num_items * struct_size; 894 895 if (data == NULL) 896 datalen = 0; 897 898 namelen = strlen(name) + 1; 899 buflen = namelen + datalen; 900 buf = malloc(buflen, M_KTRACE, M_WAITOK); 901 strcpy(buf, name); 902 if (seg == UIO_SYSSPACE) 903 bcopy(data, buf + namelen, datalen); 904 else { 905 if (copyin(data, buf + namelen, datalen) != 0) { 906 free(buf, M_KTRACE); 907 return; 908 } 909 } 910 if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { 911 free(buf, M_KTRACE); 912 return; 913 } 914 ksa = &req->ktr_data.ktr_struct_array; 915 ksa->struct_size = struct_size; 916 req->ktr_buffer = buf; 917 req->ktr_header.ktr_len = buflen; 918 ktr_submitrequest(curthread, req); 919} 920 921void 922ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, 923 const cap_rights_t *held) 924{ 925 struct thread *td = curthread; 926 struct ktr_request *req; 927 struct ktr_cap_fail *kcf; 928 929 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 930 return; 931 932 req = ktr_getrequest(KTR_CAPFAIL); 933 if (req == NULL) 934 return; 935 kcf = &req->ktr_data.ktr_cap_fail; 936 kcf->cap_type = type; 937 if (needed != NULL) 938 kcf->cap_needed = *needed; 939 else 940 cap_rights_init(&kcf->cap_needed); 941 if (held != NULL) 942 kcf->cap_held = *held; 943 else 944 cap_rights_init(&kcf->cap_held); 945 ktr_enqueuerequest(td, req); 946 ktrace_exit(td); 947} 948 949void 950ktrfault(vm_offset_t vaddr, int type) 951{ 952 struct thread *td = curthread; 953 struct ktr_request *req; 954 struct ktr_fault *kf; 955 956 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 957 return; 958 959 req = ktr_getrequest(KTR_FAULT); 960 if (req == NULL) 961 return; 962 kf = &req->ktr_data.ktr_fault; 963 kf->vaddr = vaddr; 964 kf->type = type; 965 ktr_enqueuerequest(td, req); 966 ktrace_exit(td); 967} 968 969void 970ktrfaultend(int result) 971{ 972 struct thread *td = curthread; 973 struct ktr_request *req; 974 struct ktr_faultend *kf; 975 976 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 977 return; 978 979 req = ktr_getrequest(KTR_FAULTEND); 980 if (req == NULL) 981 return; 982 kf = &req->ktr_data.ktr_faultend; 983 kf->result = result; 984 ktr_enqueuerequest(td, req); 985 ktrace_exit(td); 986} 987#endif /* KTRACE */ 988 989/* Interface and common routines */ 990 991#ifndef _SYS_SYSPROTO_H_ 992struct ktrace_args { 993 char *fname; 994 int ops; 995 int facs; 996 int pid; 997}; 998#endif 999/* ARGSUSED */ 1000int 1001sys_ktrace(struct thread *td, struct ktrace_args *uap) 1002{ 1003#ifdef KTRACE 1004 struct vnode *vp = NULL; 1005 struct proc *p; 1006 struct pgrp *pg; 1007 int facs = uap->facs & ~KTRFAC_ROOT; 1008 int ops = KTROP(uap->ops); 1009 int descend = uap->ops & KTRFLAG_DESCEND; 1010 int nfound, ret = 0; 1011 int flags, error = 0; 1012 struct nameidata nd; 1013 struct ktr_io_params *kiop, *old_kiop; 1014 1015 /* 1016 * Need something to (un)trace. 1017 */ 1018 if (ops != KTROP_CLEARFILE && facs == 0) 1019 return (EINVAL); 1020 1021 kiop = NULL; 1022 ktrace_enter(td); 1023 if (ops != KTROP_CLEAR) { 1024 /* 1025 * an operation which requires a file argument. 1026 */ 1027 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 1028 flags = FREAD | FWRITE | O_NOFOLLOW; 1029 error = vn_open(&nd, &flags, 0, NULL); 1030 if (error) { 1031 ktrace_exit(td); 1032 return (error); 1033 } 1034 NDFREE(&nd, NDF_ONLY_PNBUF); 1035 vp = nd.ni_vp; 1036 VOP_UNLOCK(vp); 1037 if (vp->v_type != VREG) { 1038 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1039 ktrace_exit(td); 1040 return (EACCES); 1041 } 1042 kiop = ktr_io_params_alloc(td, vp); 1043 } 1044 /* 1045 * Clear all uses of the tracefile. 1046 */ 1047 if (ops == KTROP_CLEARFILE) { 1048restart: 1049 sx_slock(&allproc_lock); 1050 FOREACH_PROC_IN_SYSTEM(p) { 1051 old_kiop = NULL; 1052 PROC_LOCK(p); 1053 if (p->p_ktrioparms != NULL && 1054 p->p_ktrioparms->vp == vp) { 1055 if (ktrcanset(td, p)) { 1056 mtx_lock(&ktrace_mtx); 1057 old_kiop = ktr_freeproc(p); 1058 mtx_unlock(&ktrace_mtx); 1059 } else 1060 error = EPERM; 1061 } 1062 PROC_UNLOCK(p); 1063 if (old_kiop != NULL) { 1064 sx_sunlock(&allproc_lock); 1065 ktr_io_params_free(old_kiop); 1066 goto restart; 1067 } 1068 } 1069 sx_sunlock(&allproc_lock); 1070 goto done; 1071 } 1072 /* 1073 * do it 1074 */ 1075 sx_slock(&proctree_lock); 1076 if (uap->pid < 0) { 1077 /* 1078 * by process group 1079 */ 1080 pg = pgfind(-uap->pid); 1081 if (pg == NULL) { 1082 sx_sunlock(&proctree_lock); 1083 error = ESRCH; 1084 goto done; 1085 } 1086 /* 1087 * ktrops() may call vrele(). Lock pg_members 1088 * by the proctree_lock rather than pg_mtx. 1089 */ 1090 PGRP_UNLOCK(pg); 1091 nfound = 0; 1092 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1093 PROC_LOCK(p); 1094 if (p->p_state == PRS_NEW || 1095 p_cansee(td, p) != 0) { 1096 PROC_UNLOCK(p); 1097 continue; 1098 } 1099 nfound++; 1100 if (descend) 1101 ret |= ktrsetchildren(td, p, ops, facs, kiop); 1102 else 1103 ret |= ktrops(td, p, ops, facs, kiop); 1104 } 1105 if (nfound == 0) { 1106 sx_sunlock(&proctree_lock); 1107 error = ESRCH; 1108 goto done; 1109 } 1110 } else { 1111 /* 1112 * by pid 1113 */ 1114 p = pfind(uap->pid); 1115 if (p == NULL) 1116 error = ESRCH; 1117 else 1118 error = p_cansee(td, p); 1119 if (error) { 1120 if (p != NULL) 1121 PROC_UNLOCK(p); 1122 sx_sunlock(&proctree_lock); 1123 goto done; 1124 } 1125 if (descend) 1126 ret |= ktrsetchildren(td, p, ops, facs, kiop); 1127 else 1128 ret |= ktrops(td, p, ops, facs, kiop); 1129 } 1130 sx_sunlock(&proctree_lock); 1131 if (!ret) 1132 error = EPERM; 1133done: 1134 if (kiop != NULL) { 1135 mtx_lock(&ktrace_mtx); 1136 kiop = ktr_io_params_rele(kiop); 1137 mtx_unlock(&ktrace_mtx); 1138 ktr_io_params_free(kiop); 1139 } 1140 ktrace_exit(td); 1141 return (error); 1142#else /* !KTRACE */ 1143 return (ENOSYS); 1144#endif /* KTRACE */ 1145} 1146 1147/* ARGSUSED */ 1148int 1149sys_utrace(struct thread *td, struct utrace_args *uap) 1150{ 1151 1152#ifdef KTRACE 1153 struct ktr_request *req; 1154 void *cp; 1155 int error; 1156 1157 if (!KTRPOINT(td, KTR_USER)) 1158 return (0); 1159 if (uap->len > KTR_USER_MAXLEN) 1160 return (EINVAL); 1161 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1162 error = copyin(uap->addr, cp, uap->len); 1163 if (error) { 1164 free(cp, M_KTRACE); 1165 return (error); 1166 } 1167 req = ktr_getrequest(KTR_USER); 1168 if (req == NULL) { 1169 free(cp, M_KTRACE); 1170 return (ENOMEM); 1171 } 1172 req->ktr_buffer = cp; 1173 req->ktr_header.ktr_len = uap->len; 1174 ktr_submitrequest(td, req); 1175 return (0); 1176#else /* !KTRACE */ 1177 return (ENOSYS); 1178#endif /* KTRACE */ 1179} 1180 1181#ifdef KTRACE 1182static int 1183ktrops(struct thread *td, struct proc *p, int ops, int facs, 1184 struct ktr_io_params *new_kiop) 1185{ 1186 struct ktr_io_params *old_kiop; 1187 1188 PROC_LOCK_ASSERT(p, MA_OWNED); 1189 if (!ktrcanset(td, p)) { 1190 PROC_UNLOCK(p); 1191 return (0); 1192 } 1193 if (p->p_flag & P_WEXIT) { 1194 /* If the process is exiting, just ignore it. */ 1195 PROC_UNLOCK(p); 1196 return (1); 1197 } 1198 old_kiop = NULL; 1199 mtx_lock(&ktrace_mtx); 1200 if (ops == KTROP_SET) { 1201 if (p->p_ktrioparms != NULL && 1202 p->p_ktrioparms->vp != new_kiop->vp) { 1203 /* if trace file already in use, relinquish below */ 1204 old_kiop = ktr_io_params_rele(p->p_ktrioparms); 1205 p->p_ktrioparms = NULL; 1206 } 1207 if (p->p_ktrioparms == NULL) { 1208 p->p_ktrioparms = new_kiop; 1209 ktr_io_params_ref(new_kiop); 1210 } 1211 p->p_traceflag |= facs; 1212 if (priv_check(td, PRIV_KTRACE) == 0) 1213 p->p_traceflag |= KTRFAC_ROOT; 1214 } else { 1215 /* KTROP_CLEAR */ 1216 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1217 /* no more tracing */ 1218 old_kiop = ktr_freeproc(p); 1219 } 1220 mtx_unlock(&ktrace_mtx); 1221 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1222 ktrprocctor_entered(td, p); 1223 PROC_UNLOCK(p); 1224 ktr_io_params_free(old_kiop); 1225 1226 return (1); 1227} 1228 1229static int 1230ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 1231 struct ktr_io_params *new_kiop) 1232{ 1233 struct proc *p; 1234 int ret = 0; 1235 1236 p = top; 1237 PROC_LOCK_ASSERT(p, MA_OWNED); 1238 sx_assert(&proctree_lock, SX_LOCKED); 1239 for (;;) { 1240 ret |= ktrops(td, p, ops, facs, new_kiop); 1241 /* 1242 * If this process has children, descend to them next, 1243 * otherwise do any siblings, and if done with this level, 1244 * follow back up the tree (but not past top). 1245 */ 1246 if (!LIST_EMPTY(&p->p_children)) 1247 p = LIST_FIRST(&p->p_children); 1248 else for (;;) { 1249 if (p == top) 1250 return (ret); 1251 if (LIST_NEXT(p, p_sibling)) { 1252 p = LIST_NEXT(p, p_sibling); 1253 break; 1254 } 1255 p = p->p_pptr; 1256 } 1257 PROC_LOCK(p); 1258 } 1259 /*NOTREACHED*/ 1260} 1261 1262static void 1263ktr_writerequest(struct thread *td, struct ktr_request *req) 1264{ 1265 struct ktr_io_params *kiop, *kiop1; 1266 struct ktr_header *kth; 1267 struct vnode *vp; 1268 struct proc *p; 1269 struct ucred *cred; 1270 struct uio auio; 1271 struct iovec aiov[3]; 1272 struct mount *mp; 1273 off_t lim; 1274 int datalen, buflen; 1275 int error; 1276 1277 p = td->td_proc; 1278 1279 /* 1280 * We reference the kiop for use in I/O in case ktrace is 1281 * disabled on the process as we write out the request. 1282 */ 1283 mtx_lock(&ktrace_mtx); 1284 kiop = p->p_ktrioparms; 1285 1286 /* 1287 * If kiop is NULL, it has been cleared out from under this 1288 * request, so just drop it. 1289 */ 1290 if (kiop == NULL) { 1291 mtx_unlock(&ktrace_mtx); 1292 return; 1293 } 1294 1295 ktr_io_params_ref(kiop); 1296 vp = kiop->vp; 1297 cred = kiop->cr; 1298 lim = kiop->lim; 1299 1300 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1301 mtx_unlock(&ktrace_mtx); 1302 1303 kth = &req->ktr_header; 1304 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), 1305 ("data_lengths array overflow")); 1306 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1307 buflen = kth->ktr_len; 1308 auio.uio_iov = &aiov[0]; 1309 auio.uio_offset = 0; 1310 auio.uio_segflg = UIO_SYSSPACE; 1311 auio.uio_rw = UIO_WRITE; 1312 aiov[0].iov_base = (caddr_t)kth; 1313 aiov[0].iov_len = sizeof(struct ktr_header); 1314 auio.uio_resid = sizeof(struct ktr_header); 1315 auio.uio_iovcnt = 1; 1316 auio.uio_td = td; 1317 if (datalen != 0) { 1318 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1319 aiov[1].iov_len = datalen; 1320 auio.uio_resid += datalen; 1321 auio.uio_iovcnt++; 1322 kth->ktr_len += datalen; 1323 } 1324 if (buflen != 0) { 1325 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1326 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1327 aiov[auio.uio_iovcnt].iov_len = buflen; 1328 auio.uio_resid += buflen; 1329 auio.uio_iovcnt++; 1330 } 1331 1332 vn_start_write(vp, &mp, V_WAIT); 1333 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1334 td->td_ktr_io_lim = lim; 1335#ifdef MAC 1336 error = mac_vnode_check_write(cred, NOCRED, vp); 1337 if (error == 0) 1338#endif 1339 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1340 VOP_UNLOCK(vp); 1341 vn_finished_write(mp); 1342 if (error == 0) { 1343 mtx_lock(&ktrace_mtx); 1344 kiop = ktr_io_params_rele(kiop); 1345 mtx_unlock(&ktrace_mtx); 1346 ktr_io_params_free(kiop); 1347 return; 1348 } 1349 1350 /* 1351 * If error encountered, give up tracing on this vnode on this 1352 * process. Other processes might still be suitable for 1353 * writes to this vnode. 1354 */ 1355 log(LOG_NOTICE, 1356 "ktrace write failed, errno %d, tracing stopped for pid %d\n", 1357 error, p->p_pid); 1358 1359 kiop1 = NULL; 1360 PROC_LOCK(p); 1361 mtx_lock(&ktrace_mtx); 1362 if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) 1363 kiop1 = ktr_freeproc(p); 1364 kiop = ktr_io_params_rele(kiop); 1365 mtx_unlock(&ktrace_mtx); 1366 PROC_UNLOCK(p); 1367 ktr_io_params_free(kiop1); 1368 ktr_io_params_free(kiop); 1369} 1370 1371/* 1372 * Return true if caller has permission to set the ktracing state 1373 * of target. Essentially, the target can't possess any 1374 * more permissions than the caller. KTRFAC_ROOT signifies that 1375 * root previously set the tracing status on the target process, and 1376 * so, only root may further change it. 1377 */ 1378static int 1379ktrcanset(struct thread *td, struct proc *targetp) 1380{ 1381 1382 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1383 if (targetp->p_traceflag & KTRFAC_ROOT && 1384 priv_check(td, PRIV_KTRACE)) 1385 return (0); 1386 1387 if (p_candebug(td, targetp) != 0) 1388 return (0); 1389 1390 return (1); 1391} 1392 1393#endif /* KTRACE */ 1394