1/* 2 * Minimal portability layer for system call differences between 3 * Capsicum OSes. 4 */ 5#ifndef __SYSCALLS_H__ 6#define __SYSCALLS_H__ 7 8/************************************************************ 9 * FreeBSD 10 ************************************************************/ 11#ifdef __FreeBSD__ 12 13/* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */ 14#define umount2(T, F) unmount(T, F) 15 16/* Map sighandler_y (Linux) to sig_t (FreeBSD) */ 17#define sighandler_t sig_t 18 19/* profil(2) has a first argument of char* */ 20#define profil_arg1_t char 21 22/* FreeBSD has getdents(2) available */ 23#include <sys/types.h> 24#include <dirent.h> 25inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 26 return getdents(fd, (char*)dirp, count); 27} 28#include <sys/mman.h> 29inline int mincore_(void *addr, size_t length, unsigned char *vec) { 30 return mincore(addr, length, (char*)vec); 31} 32#define getpid_ getpid 33 34/* Map Linux-style sendfile to FreeBSD sendfile */ 35#include <sys/socket.h> 36#include <sys/uio.h> 37inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) { 38 return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0); 39} 40 41/* A sample mount(2) call */ 42#include <sys/param.h> 43#include <sys/mount.h> 44inline int bogus_mount_() { 45 return mount("procfs", "/not_mounted", 0, NULL); 46} 47 48/* Mappings for extended attribute functions */ 49#include <sys/extattr.h> 50#include <errno.h> 51static const char *fbsd_extattr_skip_prefix(const char *p) { 52 if (*p++ == 'u' && *p++ == 's' && *p++ == 'e' && *p++ == 'r' && *p++ == '.') 53 return p; 54 errno = EINVAL; 55 return NULL; 56} 57inline ssize_t flistxattr_(int fd, char *list, size_t size) { 58 return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size); 59} 60inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) { 61 if (!(name = fbsd_extattr_skip_prefix(name))) 62 return -1; 63 return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 64} 65inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) { 66 if (!(name = fbsd_extattr_skip_prefix(name))) 67 return -1; 68 return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); 69} 70inline int fremovexattr_(int fd, const char *name) { 71 if (!(name = fbsd_extattr_skip_prefix(name))) 72 return -1; 73 return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name); 74} 75 76/* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */ 77#include <sys/syscall.h> 78extern "C" { 79extern int __sys_kmq_notify(int, const struct sigevent *); 80extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *); 81extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict); 82extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t, 83 unsigned *__restrict, const struct timespec *__restrict); 84extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned, 85 const struct timespec *); 86extern int __sys_kmq_unlink(const char *); 87} 88#define mq_notify_ __sys_kmq_notify 89#define mq_open_ __sys_kmq_open 90#define mq_setattr_ __sys_kmq_setattr 91#define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B) 92#define mq_timedreceive_ __sys_kmq_timedreceive 93#define mq_timedsend_ __sys_kmq_timedsend 94#define mq_unlink_ __sys_kmq_unlink 95#define mq_close_ close 96#include <sys/ptrace.h> 97inline long ptrace_(int request, pid_t pid, void *addr, void *data) { 98 return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data)); 99} 100#define PTRACE_PEEKDATA_ PT_READ_D 101#define getegid_ getegid 102#define getgid_ getgid 103#define geteuid_ geteuid 104#define getuid_ getuid 105#define getgroups_ getgroups 106#define getrlimit_ getrlimit 107#define bind_ bind 108#define connect_ connect 109 110/* Features available */ 111#if __FreeBSD_version >= 1000000 112#define HAVE_CHFLAGSAT 113#define HAVE_BINDAT 114#define HAVE_CONNECTAT 115#endif 116#define HAVE_CHFLAGS 117#define HAVE_GETFSSTAT 118#define HAVE_REVOKE 119#define HAVE_GETLOGIN 120#define HAVE_MKFIFOAT 121#define HAVE_SYSARCH 122#include <machine/sysarch.h> 123#define HAVE_STAT_BIRTHTIME 124#define HAVE_SYSCTL 125#define HAVE_FPATHCONF 126#define HAVE_F_DUP2FD 127#define HAVE_PSELECT 128#define HAVE_SCTP 129 130/* FreeBSD only allows root to call mlock[all]/munlock[all] */ 131#define MLOCK_REQUIRES_ROOT 1 132/* FreeBSD effectively only allows root to call sched_setscheduler */ 133#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 134 135#endif /* FreeBSD */ 136 137/************************************************************ 138 * Linux 139 ************************************************************/ 140#ifdef __linux__ 141#include <fcntl.h> 142#include <unistd.h> 143#include <sys/prctl.h> 144#include <sys/syscall.h> 145#include <sys/types.h> 146#include <sys/time.h> 147#include <sys/resource.h> 148#include <sys/wait.h> 149#include <sys/sendfile.h> 150#include <sys/statfs.h> 151#include <sys/xattr.h> 152#include <sys/mount.h> 153#include <linux/net.h> 154 155/* profil(2) has a first argument of unsigned short* */ 156#define profil_arg1_t unsigned short 157 158static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { 159 return syscall(__NR_getdents, fd, dirp, count); 160} 161/* A sample mount(2) call */ 162static inline int bogus_mount_() { 163 return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, ""); 164} 165 166/* libc's getpid() wrapper caches the pid value, and doesn't invalidate 167 * the cached value on pdfork(), so directly syscall. */ 168static inline pid_t getpid_() { 169 return syscall(__NR_getpid); 170} 171static inline int execveat(int fd, const char *path, 172 char *const argv[], char *const envp[], int flags) { 173 return syscall(__NR_execveat, fd, path, argv, envp, flags); 174} 175 176/* 177 * Linux glibc includes an fexecve() function, implemented via the /proc 178 * filesystem. Bypass this and go directly to the execveat(2) syscall. 179 */ 180static inline int fexecve_(int fd, char *const argv[], char *const envp[]) { 181 return execveat(fd, "", argv, envp, AT_EMPTY_PATH); 182} 183/* 184 * Linux glibc attempts to be clever and intercepts various uid/gid functions. 185 * Bypass by calling the syscalls directly. 186 */ 187static inline gid_t getegid_(void) { return syscall(__NR_getegid); } 188static inline gid_t getgid_(void) { return syscall(__NR_getgid); } 189static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); } 190static inline uid_t getuid_(void) { return syscall(__NR_getuid); } 191static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); } 192static inline int getrlimit_(int resource, struct rlimit *rlim) { 193 return syscall(__NR_getrlimit, resource, rlim); 194} 195 196/* 197 * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation, 198 * so use the raw syscall for those operations that are disallowed in capability mode. 199 */ 200#ifdef __NR_bind 201#define bind_ bind 202#else 203static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 204 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 205 return syscall(__NR_socketcall, SYS_BIND, args); 206} 207#endif 208#ifdef __NR_connect 209#define connect_ connect 210#else 211static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { 212 unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; 213 return syscall(__NR_socketcall, SYS_CONNECT, args); 214} 215#endif 216 217#define mincore_ mincore 218#define sendfile_ sendfile 219#define flistxattr_ flistxattr 220#define fgetxattr_ fgetxattr 221#define fsetxattr_ fsetxattr 222#define fremovexattr_ fremovexattr 223#define mq_notify_ mq_notify 224#define mq_open_ mq_open 225#define mq_setattr_ mq_setattr 226#define mq_getattr_ mq_getattr 227#define mq_timedreceive_ mq_timedreceive 228#define mq_timedsend_ mq_timedsend 229#define mq_unlink_ mq_unlink 230#define mq_close_ mq_close 231#define ptrace_ ptrace 232#define PTRACE_PEEKDATA_ PTRACE_PEEKDATA 233 234/* Features available */ 235#define HAVE_DUP3 236#define HAVE_PIPE2 237#include <sys/fsuid.h> /* for setfsgid()/setfsuid() */ 238#define HAVE_SETFSUID 239#define HAVE_SETFSGID 240#define HAVE_READAHEAD 241#define HAVE_SEND_RECV_MMSG 242#define HAVE_SYNCFS 243#define HAVE_SYNC_FILE_RANGE 244#include <sys/uio.h> /* for vmsplice */ 245#define HAVE_TEE 246#define HAVE_SPLICE 247#define HAVE_VMSPLICE 248#define HAVE_PSELECT 249#define HAVE_PPOLL 250#define HAVE_EXECVEAT 251#define HAVE_SYSCALL 252#define HAVE_MKNOD_REG 253#define HAVE_MKNOD_SOCKET 254/* 255 * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file 256 * and the normal <fcntl.h> as they have some clashing definitions. Bypass by directly 257 * defining O_BENEATH, using the current proposed x86 value. (This will therefore not 258 * work for non-x86, and may need changing in future if a different value gets merged.) 259 */ 260#ifndef O_BENEATH 261#define O_BENEATH 040000000 /* no / or .. in openat path */ 262#endif 263 264 265/* Linux allows anyone to call mlock[all]/munlock[all] */ 266#define MLOCK_REQUIRES_ROOT 0 267/* Linux allows anyone to call sched_setscheduler */ 268#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 269 270#endif /* Linux */ 271 272#endif /*__SYSCALLS_H__*/ 273