15969Ssimonis/*- 210295Sclanger * SPDX-License-Identifier: BSD-4-Clause 310295Sclanger * 45969Ssimonis * Copyright (C) 1994, David Greenman 55969Ssimonis * Copyright (c) 1990, 1993 65969Ssimonis * The Regents of the University of California. All rights reserved. 75969Ssimonis * Copyright (C) 2010 Konstantin Belousov <kib@freebsd.org> 85969Ssimonis * 95969Ssimonis * This code is derived from software contributed to Berkeley by 105969Ssimonis * the University of Utah, and William Jolitz. 115969Ssimonis * 125969Ssimonis * Redistribution and use in source and binary forms, with or without 135969Ssimonis * modification, are permitted provided that the following conditions 145969Ssimonis * are met: 155969Ssimonis * 1. Redistributions of source code must retain the above copyright 165969Ssimonis * notice, this list of conditions and the following disclaimer. 175969Ssimonis * 2. Redistributions in binary form must reproduce the above copyright 185969Ssimonis * notice, this list of conditions and the following disclaimer in the 195969Ssimonis * documentation and/or other materials provided with the distribution. 205969Ssimonis * 3. All advertising materials mentioning features or use of this software 215969Ssimonis * must display the following acknowledgement: 225969Ssimonis * This product includes software developed by the University of 235969Ssimonis * California, Berkeley and its contributors. 245969Ssimonis * 4. Neither the name of the University nor the names of its contributors 255969Ssimonis * may be used to endorse or promote products derived from this software 265969Ssimonis * without specific prior written permission. 275969Ssimonis * 286683Sgoetz * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 295969Ssimonis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 305969Ssimonis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 315969Ssimonis * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 325969Ssimonis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 335969Ssimonis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 345969Ssimonis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 355969Ssimonis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 365969Ssimonis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 375969Ssimonis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 385969Ssimonis * SUCH DAMAGE. 395969Ssimonis */ 405969Ssimonis 415969Ssimonis#include "opt_capsicum.h" 425969Ssimonis#include "opt_ktrace.h" 439846Sstuefe#include <sys/capsicum.h> 445969Ssimonis#include <sys/ktr.h> 455969Ssimonis#include <sys/vmmeter.h> 465969Ssimonis#ifdef KTRACE 475969Ssimonis#include <sys/uio.h> 485969Ssimonis#include <sys/ktrace.h> 495969Ssimonis#endif 505969Ssimonis#include <security/audit/audit.h> 515969Ssimonis 525969Ssimonisstatic inline void 535969Ssimonissyscallenter(struct thread *td) 545969Ssimonis{ 555969Ssimonis struct proc *p; 565969Ssimonis struct syscall_args *sa; 575969Ssimonis struct sysent *se; 585969Ssimonis int error, traced; 595969Ssimonis bool sy_thr_static; 605969Ssimonis 615969Ssimonis VM_CNT_INC(v_syscall); 625969Ssimonis p = td->td_proc; 635969Ssimonis sa = &td->td_sa; 645969Ssimonis 655969Ssimonis td->td_pticks = 0; 665969Ssimonis if (__predict_false(td->td_cowgen != atomic_load_int(&p->p_cowgen))) 675969Ssimonis thread_cow_update(td); 685969Ssimonis traced = (p->p_flag & P_TRACED) != 0; 695969Ssimonis if (__predict_false(traced || td->td_dbgflags & TDB_USERWR)) { 705969Ssimonis PROC_LOCK(p); 715969Ssimonis MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); 725969Ssimonis td->td_dbgflags &= ~TDB_USERWR; 735969Ssimonis if (traced) 745969Ssimonis td->td_dbgflags |= TDB_SCE; 755969Ssimonis PROC_UNLOCK(p); 765969Ssimonis } 775969Ssimonis error = (p->p_sysent->sv_fetch_syscall_args)(td); 785969Ssimonis se = sa->callp; 795969Ssimonis#ifdef KTRACE 805969Ssimonis if (KTRPOINT(td, KTR_SYSCALL)) 815969Ssimonis ktrsyscall(sa->code, se->sy_narg, sa->args); 825969Ssimonis#endif 835969Ssimonis KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code), 845969Ssimonis (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0], 855969Ssimonis "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]); 865969Ssimonis 875969Ssimonis if (__predict_false(error != 0)) { 885969Ssimonis td->td_errno = error; 895969Ssimonis goto retval; 905969Ssimonis } 915969Ssimonis 925969Ssimonis if (__predict_false(traced)) { 935969Ssimonis PROC_LOCK(p); 945969Ssimonis if (p->p_ptevents & PTRACE_SCE) 955969Ssimonis ptracestop((td), SIGTRAP, NULL); 965969Ssimonis PROC_UNLOCK(p); 975969Ssimonis 985969Ssimonis if ((td->td_dbgflags & TDB_USERWR) != 0) { 995969Ssimonis /* 1005969Ssimonis * Reread syscall number and arguments if debugger 1015969Ssimonis * modified registers or memory. 1025969Ssimonis */ 1035969Ssimonis error = (p->p_sysent->sv_fetch_syscall_args)(td); 1045969Ssimonis se = sa->callp; 1055969Ssimonis#ifdef KTRACE 1065969Ssimonis if (KTRPOINT(td, KTR_SYSCALL)) 1075969Ssimonis ktrsyscall(sa->code, se->sy_narg, sa->args); 1085969Ssimonis#endif 1095969Ssimonis if (error != 0) { 1105969Ssimonis td->td_errno = error; 1115969Ssimonis goto retval; 1125969Ssimonis } 1135969Ssimonis } 1145969Ssimonis } 1155969Ssimonis 1165969Ssimonis#ifdef CAPABILITY_MODE 1175969Ssimonis /* 1185969Ssimonis * In capability mode, we only allow access to system calls 1195969Ssimonis * flagged with SYF_CAPENABLED. 1205969Ssimonis */ 1215969Ssimonis if ((se->sy_flags & SYF_CAPENABLED) == 0) { 1225969Ssimonis if (CAP_TRACING(td)) 1235969Ssimonis ktrcapfail(CAPFAIL_SYSCALL, NULL); 1245969Ssimonis if (IN_CAPABILITY_MODE(td)) { 1255969Ssimonis td->td_errno = error = ECAPMODE; 1265969Ssimonis goto retval; 1275969Ssimonis } 1285969Ssimonis } 1295969Ssimonis#endif 1305969Ssimonis 1315969Ssimonis /* 1325969Ssimonis * Fetch fast sigblock value at the time of syscall entry to 1335969Ssimonis * handle sleepqueue primitives which might call cursig(). 1345969Ssimonis */ 1355969Ssimonis if (__predict_false(sigfastblock_fetch_always)) 1365969Ssimonis (void)sigfastblock_fetch(td); 1375969Ssimonis 1385969Ssimonis /* Let system calls set td_errno directly. */ 1395969Ssimonis KASSERT((td->td_pflags & TDP_NERRNO) == 0, 1405969Ssimonis ("%s: TDP_NERRNO set", __func__)); 1415969Ssimonis 1425969Ssimonis sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; 1435969Ssimonis 1445969Ssimonis if (__predict_false(SYSTRACE_ENABLED() || 1455969Ssimonis AUDIT_SYSCALL_ENTER(sa->code, td) || 1465969Ssimonis !sy_thr_static)) { 1478183Sgoetz if (!sy_thr_static) { 1488183Sgoetz error = syscall_thread_enter(td, &se); 1498183Sgoetz sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; 1508183Sgoetz if (error != 0) { 1515969Ssimonis td->td_errno = error; 1525969Ssimonis goto retval; 1535969Ssimonis } 1545969Ssimonis } 1555969Ssimonis 1565969Ssimonis#ifdef KDTRACE_HOOKS 1575969Ssimonis /* Give the syscall:::entry DTrace probe a chance to fire. */ 1585969Ssimonis if (__predict_false(se->sy_entry != 0)) 1595969Ssimonis (*systrace_probe_func)(sa, SYSTRACE_ENTRY, 0); 1605969Ssimonis#endif 1615969Ssimonis error = (se->sy_call)(td, sa->args); 1625969Ssimonis /* Save the latest error return value. */ 1635969Ssimonis if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1645969Ssimonis td->td_pflags &= ~TDP_NERRNO; 1655969Ssimonis else 1665969Ssimonis td->td_errno = error; 1675969Ssimonis 1685969Ssimonis /* 1695969Ssimonis * Note that some syscall implementations (e.g., sys_execve) 1705969Ssimonis * will commit the audit record just before their final return. 1715969Ssimonis * These were done under the assumption that nothing of interest 1725969Ssimonis * would happen between their return and here, where we would 1735969Ssimonis * normally commit the audit record. These assumptions will 1745969Ssimonis * need to be revisited should any substantial logic be added 1755969Ssimonis * above. 1765969Ssimonis */ 1775969Ssimonis AUDIT_SYSCALL_EXIT(error, td); 1785969Ssimonis 1795969Ssimonis#ifdef KDTRACE_HOOKS 1805969Ssimonis /* Give the syscall:::return DTrace probe a chance to fire. */ 1815969Ssimonis if (__predict_false(se->sy_return != 0)) 1825969Ssimonis (*systrace_probe_func)(sa, SYSTRACE_RETURN, 1835969Ssimonis error ? -1 : td->td_retval[0]); 1845969Ssimonis#endif 1855969Ssimonis 1865969Ssimonis if (!sy_thr_static) 1875969Ssimonis syscall_thread_exit(td, se); 1885969Ssimonis } else { 1895969Ssimonis error = (se->sy_call)(td, sa->args); 1905969Ssimonis /* Save the latest error return value. */ 1915969Ssimonis if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1925969Ssimonis td->td_pflags &= ~TDP_NERRNO; 1935969Ssimonis else 1945969Ssimonis td->td_errno = error; 1955969Ssimonis } 1965969Ssimonis 1975969Ssimonis retval: 1985969Ssimonis KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code), 1995969Ssimonis (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "error:%d", error, 2005969Ssimonis "retval0:%#lx", td->td_retval[0], "retval1:%#lx", 2015969Ssimonis td->td_retval[1]); 2025969Ssimonis if (__predict_false(traced)) { 2035969Ssimonis PROC_LOCK(p); 2045969Ssimonis td->td_dbgflags &= ~(TDB_SCE | TDB_BOUNDARY); 2055969Ssimonis PROC_UNLOCK(p); 2065969Ssimonis } 2075969Ssimonis (p->p_sysent->sv_set_syscall_retval)(td, error); 2085969Ssimonis} 2095969Ssimonis 2105969Ssimonisstatic inline void 2115969Ssimonissyscallret(struct thread *td) 2125969Ssimonis{ 2135969Ssimonis struct proc *p; 2145969Ssimonis struct syscall_args *sa; 2155969Ssimonis ksiginfo_t ksi; 2165969Ssimonis int traced; 2175969Ssimonis 2185969Ssimonis KASSERT(td->td_errno != ERELOOKUP, 2195969Ssimonis ("ERELOOKUP not consumed syscall %d", td->td_sa.code)); 2205969Ssimonis 2218183Sgoetz p = td->td_proc; 2225969Ssimonis sa = &td->td_sa; 2235969Ssimonis if (__predict_false(td->td_errno == ENOTCAPABLE || 2245969Ssimonis td->td_errno == ECAPMODE)) { 2255969Ssimonis if ((trap_enotcap || 2265969Ssimonis (p->p_flag2 & P2_TRAPCAP) != 0) && IN_CAPABILITY_MODE(td)) { 2275969Ssimonis ksiginfo_init_trap(&ksi); 22810295Sclanger ksi.ksi_signo = SIGTRAP; 2295969Ssimonis ksi.ksi_errno = td->td_errno; 2305969Ssimonis ksi.ksi_code = TRAP_CAP; 2315969Ssimonis ksi.ksi_info.si_syscall = sa->original_code; 2325969Ssimonis trapsignal(td, &ksi); 2335969Ssimonis } 2345969Ssimonis } 2355969Ssimonis 2365969Ssimonis /* 2375969Ssimonis * Handle reschedule and other end-of-syscall issues 2385969Ssimonis */ 2395969Ssimonis userret(td, td->td_frame); 2405969Ssimonis 24110295Sclanger#ifdef KTRACE 2425969Ssimonis if (KTRPOINT(td, KTR_SYSRET)) { 2435969Ssimonis ktrsysret(sa->code, td->td_errno, td->td_retval[0]); 2445969Ssimonis } 2455969Ssimonis#endif 2465969Ssimonis 2475969Ssimonis traced = 0; 2485969Ssimonis if (__predict_false(p->p_flag & P_TRACED)) { 2495969Ssimonis traced = 1; 2505969Ssimonis PROC_LOCK(p); 2515969Ssimonis td->td_dbgflags |= TDB_SCX; 2525969Ssimonis PROC_UNLOCK(p); 2535969Ssimonis } 2545969Ssimonis if (__predict_false(traced || 2555969Ssimonis (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0)) { 2565969Ssimonis PROC_LOCK(p); 2575969Ssimonis /* 2585969Ssimonis * Linux debuggers expect an additional stop for exec, 2595969Ssimonis * between the usual syscall entry and exit. Raise 2605969Ssimonis * the exec event now and then clear TDB_EXEC so that 2615969Ssimonis * the next stop is reported as a syscall exit by 2625969Ssimonis * linux_ptrace_status(). 2635969Ssimonis * 2645969Ssimonis * We are accessing p->p_pptr without any additional 2655969Ssimonis * locks here: it cannot change while p is kept locked; 2665969Ssimonis * while the debugger could in theory change its ABI 2675969Ssimonis * while tracing another process, the outcome of such 2685969Ssimonis * a race wouln't be deterministic anyway. 2695969Ssimonis */ 2705969Ssimonis if (traced && (td->td_dbgflags & TDB_EXEC) != 0 && 2715969Ssimonis SV_PROC_ABI(p->p_pptr) == SV_ABI_LINUX) { 2725969Ssimonis ptracestop(td, SIGTRAP, NULL); 2735969Ssimonis td->td_dbgflags &= ~TDB_EXEC; 2745969Ssimonis } 2755969Ssimonis /* 2765969Ssimonis * If tracing the execed process, trap to the debugger 2775969Ssimonis * so that breakpoints can be set before the program 2785969Ssimonis * executes. If debugger requested tracing of syscall 2795969Ssimonis * returns, do it now too. 2805969Ssimonis */ 2815969Ssimonis if (traced && 2825969Ssimonis ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 || 2835969Ssimonis (p->p_ptevents & PTRACE_SCX) != 0)) { 2845969Ssimonis MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); 2855969Ssimonis td->td_dbgflags |= TDB_BOUNDARY; 2865969Ssimonis ptracestop(td, SIGTRAP, NULL); 2875969Ssimonis } 2885969Ssimonis td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK | 2895969Ssimonis TDB_BOUNDARY); 2905969Ssimonis PROC_UNLOCK(p); 2915969Ssimonis } 2925969Ssimonis} 2935969Ssimonis