1/* 2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 */ 29 30#include <kern/sched_prim.h> 31#include <kern/kalloc.h> 32#include <kern/assert.h> 33#include <kern/debug.h> 34#include <kern/lock.h> 35#include <kern/task.h> 36#include <kern/thread.h> 37#include <kern/host.h> 38#include <libkern/libkern.h> 39#include <mach/mach_time.h> 40#include <mach/task.h> 41#include <mach/host_priv.h> 42#include <mach/mach_host.h> 43#include <pexpert/pexpert.h> 44#include <sys/kern_event.h> 45#include <sys/proc.h> 46#include <sys/proc_info.h> 47#include <sys/signal.h> 48#include <sys/signalvar.h> 49#include <sys/sysctl.h> 50#include <sys/sysproto.h> 51#include <sys/wait.h> 52#include <sys/tree.h> 53#include <sys/priv.h> 54#include <vm/vm_pageout.h> 55#include <vm/vm_protos.h> 56 57#if CONFIG_FREEZE 58#include <vm/vm_map.h> 59#endif /* CONFIG_FREEZE */ 60 61#include <sys/kern_memorystatus.h> 62 63/* These are very verbose printfs(), enable with 64 * MEMORYSTATUS_DEBUG_LOG 65 */ 66#if MEMORYSTATUS_DEBUG_LOG 67#define MEMORYSTATUS_DEBUG(cond, format, ...) \ 68do { \ 69 if (cond) { printf(format, ##__VA_ARGS__); } \ 70} while(0) 71#else 72#define MEMORYSTATUS_DEBUG(cond, format, ...) 73#endif 74 75/* General tunables */ 76 77unsigned long delta_percentage = 5; 78unsigned long critical_threshold_percentage = 5; 79unsigned long idle_offset_percentage = 5; 80unsigned long pressure_threshold_percentage = 15; 81unsigned long freeze_threshold_percentage = 50; 82 83/* General memorystatus stuff */ 84 85struct klist memorystatus_klist; 86static lck_mtx_t memorystatus_klist_mutex; 87 88static void memorystatus_klist_lock(void); 89static void memorystatus_klist_unlock(void); 90 91static uint64_t memorystatus_idle_delay_time = 0; 92 93/* 94 * Memorystatus kevents 95 */ 96 97static int filt_memorystatusattach(struct knote *kn); 98static void filt_memorystatusdetach(struct knote *kn); 99static int filt_memorystatus(struct knote *kn, long hint); 100 101struct filterops memorystatus_filtops = { 102 .f_attach = filt_memorystatusattach, 103 .f_detach = filt_memorystatusdetach, 104 .f_event = filt_memorystatus, 105}; 106 107enum { 108 kMemorystatusNoPressure = 1, 109 kMemorystatusPressure = 2 110}; 111 112/* Idle guard handling */ 113 114static int32_t memorystatus_scheduled_idle_demotions = 0; 115 116static thread_call_t memorystatus_idle_demotion_call; 117 118static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); 119static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); 120static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); 121static void memorystatus_reschedule_idle_demotion_locked(void); 122 123static void memorystatus_update_priority_locked(proc_t p, int priority); 124 125int memorystatus_wakeup = 0; 126 127unsigned int memorystatus_level = 0; 128 129static int memorystatus_list_count = 0; 130 131#define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1) 132 133typedef struct memstat_bucket { 134 TAILQ_HEAD(, proc) list; 135 int count; 136} memstat_bucket_t; 137 138memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; 139 140uint64_t memstat_idle_demotion_deadline = 0; 141 142static unsigned int memorystatus_dirty_count = 0; 143 144#if !CONFIG_JETSAM 145static boolean_t kill_idle_exit = FALSE; 146#endif 147 148 149int 150memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) 151{ 152 user_addr_t level = 0; 153 154 level = args->level; 155 156 if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { 157 return EFAULT; 158 } 159 160 return 0; 161} 162 163static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search); 164static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search); 165 166static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); 167 168/* Jetsam */ 169 170#if CONFIG_JETSAM 171 172/* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ 173#define LEGACY_HIWATER 1 174 175static int memorystatus_highwater_enabled = 1; 176 177extern unsigned int vm_page_free_count; 178extern unsigned int vm_page_active_count; 179extern unsigned int vm_page_inactive_count; 180extern unsigned int vm_page_throttled_count; 181extern unsigned int vm_page_purgeable_count; 182extern unsigned int vm_page_wire_count; 183 184unsigned int memorystatus_delta = 0; 185 186static unsigned int memorystatus_available_pages = (unsigned int)-1; 187static unsigned int memorystatus_available_pages_pressure = 0; 188static unsigned int memorystatus_available_pages_critical = 0; 189static unsigned int memorystatus_available_pages_critical_base = 0; 190static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; 191#if !LATENCY_JETSAM 192static unsigned int memorystatus_available_pages_critical_idle_offset = 0; 193#endif 194 195#if DEVELOPMENT || DEBUG 196static unsigned int memorystatus_jetsam_panic_debug = 0; 197 198static unsigned int memorystatus_jetsam_policy = kPolicyDefault; 199static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; 200#endif 201 202static boolean_t kill_under_pressure = FALSE; 203 204static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; 205#define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries 206 207static unsigned int memorystatus_jetsam_snapshot_count = 0; 208static unsigned int memorystatus_jetsam_snapshot_max = 0; 209 210static void memorystatus_clear_errors(void); 211static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint); 212static int memorystatus_send_note(int event_code, void *data, size_t data_length); 213static uint32_t memorystatus_build_state(proc_t p); 214static void memorystatus_update_levels_locked(boolean_t critical_only); 215static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); 216 217static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); 218static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); 219#if LEGACY_HIWATER 220static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); 221#endif 222 223static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); 224static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); 225 226#endif /* CONFIG_JETSAM */ 227 228/* VM pressure */ 229 230#if VM_PRESSURE_EVENTS 231 232#include "vm_pressure.h" 233 234extern boolean_t memorystatus_warn_process(pid_t pid); 235 236vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; 237 238#endif /* VM_PRESSURE_EVENTS */ 239 240/* Freeze */ 241 242#if CONFIG_FREEZE 243 244boolean_t memorystatus_freeze_enabled = FALSE; 245int memorystatus_freeze_wakeup = 0; 246 247static inline boolean_t memorystatus_can_freeze_processes(void); 248static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); 249 250static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); 251 252/* Thresholds */ 253static unsigned int memorystatus_freeze_threshold = 0; 254 255static unsigned int memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; 256static unsigned int memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; 257 258static unsigned int memorystatus_frozen_count = 0; 259 260static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; 261 262/* Stats */ 263static uint64_t memorystatus_freeze_count = 0; 264static uint64_t memorystatus_freeze_pageouts = 0; 265 266/* Throttling */ 267static throttle_interval_t throttle_intervals[] = { 268 { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */ 269 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */ 270}; 271 272static uint64_t memorystatus_freeze_throttle_count = 0; 273 274static unsigned int memorystatus_suspended_count = 0; 275static unsigned int memorystatus_suspended_footprint_total = 0; 276 277#endif /* CONFIG_FREEZE */ 278 279/* Debug */ 280 281#if DEVELOPMENT || DEBUG 282 283#if CONFIG_JETSAM 284 285/* Debug aid to aid determination of limit */ 286 287static int 288sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS 289{ 290#pragma unused(oidp, arg2) 291 proc_t p; 292 unsigned int b = 0; 293 int error, enable = 0; 294 int32_t memlimit; 295 296 error = SYSCTL_OUT(req, arg1, sizeof(int)); 297 if (error || !req->newptr) { 298 return (error); 299 } 300 301 error = SYSCTL_IN(req, &enable, sizeof(int)); 302 if (error || !req->newptr) { 303 return (error); 304 } 305 306 if (!(enable == 0 || enable == 1)) { 307 return EINVAL; 308 } 309 310 proc_list_lock(); 311 312 p = memorystatus_get_first_proc_locked(&b, TRUE); 313 while (p) { 314 if (enable) { 315 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { 316 memlimit = -1; 317 } else { 318 memlimit = p->p_memstat_memlimit; 319 } 320 } else { 321 memlimit = -1; 322 } 323 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 324 325 p = memorystatus_get_next_proc_locked(&b, p, TRUE); 326 } 327 328 memorystatus_highwater_enabled = enable; 329 330 proc_list_unlock(); 331 332 return 0; 333} 334 335SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); 336 337SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); 338SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); 339SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); 340#if !LATENCY_JETSAM 341SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); 342#endif 343 344/* Diagnostic code */ 345 346enum { 347 kJetsamDiagnosticModeNone = 0, 348 kJetsamDiagnosticModeAll = 1, 349 kJetsamDiagnosticModeStopAtFirstActive = 2, 350 kJetsamDiagnosticModeCount 351} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone; 352 353static int jetsam_diagnostic_suspended_one_active_proc = 0; 354 355static int 356sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS 357{ 358#pragma unused(arg1, arg2) 359 360 const char *diagnosticStrings[] = { 361 "jetsam: diagnostic mode: resetting critical level.", 362 "jetsam: diagnostic mode: will examine all processes", 363 "jetsam: diagnostic mode: will stop at first active process" 364 }; 365 366 int error, val = jetsam_diagnostic_mode; 367 boolean_t changed = FALSE; 368 369 error = sysctl_handle_int(oidp, &val, 0, req); 370 if (error || !req->newptr) 371 return (error); 372 if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) { 373 printf("jetsam: diagnostic mode: invalid value - %d\n", val); 374 return EINVAL; 375 } 376 377 proc_list_lock(); 378 379 if ((unsigned int) val != jetsam_diagnostic_mode) { 380 jetsam_diagnostic_mode = val; 381 382 memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive; 383 384 switch (jetsam_diagnostic_mode) { 385 case kJetsamDiagnosticModeNone: 386 /* Already cleared */ 387 break; 388 case kJetsamDiagnosticModeAll: 389 memorystatus_jetsam_policy |= kPolicyDiagnoseAll; 390 break; 391 case kJetsamDiagnosticModeStopAtFirstActive: 392 memorystatus_jetsam_policy |= kPolicyDiagnoseFirst; 393 break; 394 default: 395 /* Already validated */ 396 break; 397 } 398 399 memorystatus_update_levels_locked(FALSE); 400 changed = TRUE; 401 } 402 403 proc_list_unlock(); 404 405 if (changed) { 406 printf("%s\n", diagnosticStrings[val]); 407 } 408 409 return (0); 410} 411 412SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, 413 &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode"); 414 415SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); 416 417#if VM_PRESSURE_EVENTS 418 419SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); 420 421static int 422sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS 423{ 424#pragma unused(arg1, arg2, oidp) 425 int error = 0; 426 427 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); 428 if (error) 429 return (error); 430 431 return SYSCTL_OUT(req, &memorystatus_vm_pressure_level, sizeof(memorystatus_vm_pressure_level)); 432} 433 434SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, 435 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); 436 437static int 438sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS 439{ 440#pragma unused(arg1, arg2) 441 442 int error, pid = 0; 443 444 error = sysctl_handle_int(oidp, &pid, 0, req); 445 if (error || !req->newptr) 446 return (error); 447 448 return vm_dispatch_pressure_note_to_pid(pid, FALSE); 449} 450 451SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 452 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", ""); 453 454#endif /* VM_PRESSURE_EVENTS */ 455 456#endif /* CONFIG_JETSAM */ 457 458#endif /* DEVELOPMENT || DEBUG */ 459 460#if CONFIG_FREEZE 461 462SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); 463 464SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); 465SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); 466 467SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, ""); 468SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); 469SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, ""); 470SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); 471 472boolean_t memorystatus_freeze_throttle_enabled = TRUE; 473SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); 474 475/* 476 * Enabled via: <rdar://problem/13248767> Enable the sysctl_memorystatus_freeze/thaw sysctls on Release KC 477 * 478 * TODO: Manual trigger of freeze and thaw for dev / debug kernels only. 479 * <rdar://problem/13248795> Disable/restrict the sysctl_memorystatus_freeze/thaw sysctls on Release KC 480 */ 481static int 482sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS 483{ 484#pragma unused(arg1, arg2) 485 486 int error, pid = 0; 487 proc_t p; 488 489 error = sysctl_handle_int(oidp, &pid, 0, req); 490 if (error || !req->newptr) 491 return (error); 492 493 p = proc_find(pid); 494 if (p != NULL) { 495 uint32_t purgeable, wired, clean, dirty; 496 boolean_t shared; 497 uint32_t max_pages = 0; 498 499 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 500 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); 501 } else { 502 max_pages = UINT32_MAX - 1; 503 } 504 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); 505 proc_rele(p); 506 507 if (error) 508 error = EIO; 509 return error; 510 } 511 return EINVAL; 512} 513 514SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 515 0, 0, &sysctl_memorystatus_freeze, "I", ""); 516 517static int 518sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS 519{ 520#pragma unused(arg1, arg2) 521 522 int error, pid = 0; 523 proc_t p; 524 525 error = sysctl_handle_int(oidp, &pid, 0, req); 526 if (error || !req->newptr) 527 return (error); 528 529 p = proc_find(pid); 530 if (p != NULL) { 531 error = task_thaw(p->task); 532 proc_rele(p); 533 534 if (error) 535 error = EIO; 536 return error; 537 } 538 539 return EINVAL; 540} 541 542SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 543 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); 544 545#endif /* CONFIG_FREEZE */ 546 547extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, 548 void *parameter, 549 integer_t priority, 550 thread_t *new_thread); 551 552static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { 553 memstat_bucket_t *current_bucket; 554 proc_t next_p; 555 556 if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { 557 return NULL; 558 } 559 560 current_bucket = &memstat_bucket[*bucket_index]; 561 next_p = TAILQ_FIRST(¤t_bucket->list); 562 if (!next_p && search) { 563 while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { 564 current_bucket = &memstat_bucket[*bucket_index]; 565 next_p = TAILQ_FIRST(¤t_bucket->list); 566 } 567 } 568 569 return next_p; 570} 571 572static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) { 573 memstat_bucket_t *current_bucket; 574 proc_t next_p; 575 576 if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { 577 return NULL; 578 } 579 580 next_p = TAILQ_NEXT(p, p_memstat_list); 581 while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { 582 current_bucket = &memstat_bucket[*bucket_index]; 583 next_p = TAILQ_FIRST(¤t_bucket->list); 584 } 585 586 return next_p; 587} 588 589__private_extern__ void 590memorystatus_init(void) 591{ 592 thread_t thread = THREAD_NULL; 593 kern_return_t result; 594 int i; 595 596 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); 597 598 /* Init buckets */ 599 for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { 600 TAILQ_INIT(&memstat_bucket[i].list); 601 memstat_bucket[i].count = 0; 602 } 603 604 memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); 605 606 /* Apply overrides */ 607 PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); 608 assert(delta_percentage < 100); 609 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); 610 assert(critical_threshold_percentage < 100); 611 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); 612 assert(idle_offset_percentage < 100); 613 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); 614 assert(pressure_threshold_percentage < 100); 615 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); 616 assert(freeze_threshold_percentage < 100); 617 618#if CONFIG_JETSAM 619 memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; 620#if !LATENCY_JETSAM 621 memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; 622#endif 623 624 memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; 625 626 memorystatus_jetsam_snapshot_max = maxproc; 627 memorystatus_jetsam_snapshot = 628 (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) + 629 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); 630 if (!memorystatus_jetsam_snapshot) { 631 panic("Could not allocate memorystatus_jetsam_snapshot"); 632 } 633 634 /* No contention at this point */ 635 memorystatus_update_levels_locked(FALSE); 636#endif 637 638#if CONFIG_FREEZE 639 memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta; 640#endif 641 642 result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); 643 if (result == KERN_SUCCESS) { 644 thread_deallocate(thread); 645 } else { 646 panic("Could not create memorystatus_thread"); 647 } 648} 649 650/* Centralised for the purposes of allowing panic-on-jetsam */ 651extern void 652vm_wake_compactor_swapper(void); 653 654static boolean_t 655memorystatus_do_kill(proc_t p, uint32_t cause) { 656 657 int retval = 0; 658 659#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) 660 if (memorystatus_jetsam_panic_debug & (1 << cause)) { 661 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); 662 } 663#else 664#pragma unused(cause) 665#endif 666 int jetsam_flags = P_LTERM_JETSAM; 667 switch (cause) { 668 case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; 669 case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; 670 case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; 671 case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; 672 case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; 673 case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; 674 } 675 retval = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); 676 677 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 678 vm_wake_compactor_swapper(); 679 } 680 681 return (retval == 0); 682} 683 684/* 685 * Node manipulation 686 */ 687 688static void 689memorystatus_check_levels_locked(void) { 690#if CONFIG_JETSAM 691 /* Update levels */ 692 memorystatus_update_levels_locked(TRUE); 693#endif 694} 695 696static void 697memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) 698{ 699 proc_t p; 700 uint64_t current_time; 701 memstat_bucket_t *demotion_bucket; 702 703 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); 704 705 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); 706 707 current_time = mach_absolute_time(); 708 709 proc_list_lock(); 710 711 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; 712 p = TAILQ_FIRST(&demotion_bucket->list); 713 714 while (p) { 715 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); 716 717 assert(p->p_memstat_idledeadline); 718 assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS); 719 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED); 720 721 if (current_time >= p->p_memstat_idledeadline) { 722#if DEBUG || DEVELOPMENT 723 if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { 724 printf("memorystatus_perform_idle_demotion: moving process %d to idle band, but never dirtied (0x%x)!\n", p->p_pid, p->p_memstat_dirty); 725 } 726#endif 727 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 728 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE); 729 730 // The prior process has moved out of the demotion bucket, so grab the new head and continue 731 p = TAILQ_FIRST(&demotion_bucket->list); 732 continue; 733 } 734 735 // No further candidates 736 break; 737 } 738 739 memorystatus_reschedule_idle_demotion_locked(); 740 741 proc_list_unlock(); 742 743 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); 744} 745 746static void 747memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) 748{ 749 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", 750 p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); 751 752 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)); 753 754 if (set_state) { 755 assert(p->p_memstat_idledeadline == 0); 756 p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; 757 } 758 759 assert(p->p_memstat_idledeadline); 760 761 memorystatus_scheduled_idle_demotions++; 762} 763 764static void 765memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) 766{ 767 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", 768 p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); 769 770 assert(p->p_memstat_idledeadline); 771 772 if (clear_state) { 773 p->p_memstat_idledeadline = 0; 774 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; 775 } 776 777 memorystatus_scheduled_idle_demotions--; 778 assert(memorystatus_scheduled_idle_demotions >= 0); 779} 780 781static void 782memorystatus_reschedule_idle_demotion_locked(void) { 783 if (0 == memorystatus_scheduled_idle_demotions) { 784 if (memstat_idle_demotion_deadline) { 785 /* Transitioned 1->0, so cancel next call */ 786 thread_call_cancel(memorystatus_idle_demotion_call); 787 memstat_idle_demotion_deadline = 0; 788 } 789 } else { 790 memstat_bucket_t *demotion_bucket; 791 proc_t p; 792 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; 793 p = TAILQ_FIRST(&demotion_bucket->list); 794 assert(p && p->p_memstat_idledeadline); 795 796 if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ 797 thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); 798 memstat_idle_demotion_deadline = p->p_memstat_idledeadline; 799 } 800 } 801} 802 803/* 804 * List manipulation 805 */ 806 807int 808memorystatus_add(proc_t p, boolean_t locked) 809{ 810 memstat_bucket_t *bucket; 811 812 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->pid, priority); 813 814 if (!locked) { 815 proc_list_lock(); 816 } 817 818 /* Processes marked internal do not have priority tracked */ 819 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 820 goto exit; 821 } 822 823 bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 824 825 TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); 826 bucket->count++; 827 828 memorystatus_list_count++; 829 830 memorystatus_check_levels_locked(); 831 832exit: 833 if (!locked) { 834 proc_list_unlock(); 835 } 836 837 return 0; 838} 839 840static void 841memorystatus_update_priority_locked(proc_t p, int priority) 842{ 843 memstat_bucket_t *old_bucket, *new_bucket; 844 845 assert(priority < MEMSTAT_BUCKET_COUNT); 846 847 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ 848 if ((p->p_listflag & P_LIST_EXITED) != 0) { 849 return; 850 } 851 852 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d\n", p->p_pid, priority); 853 854 old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 855 TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); 856 old_bucket->count--; 857 858 new_bucket = &memstat_bucket[priority]; 859 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); 860 new_bucket->count++; 861 862#if CONFIG_JETSAM 863 if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { 864 if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || 865 ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { 866 int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; 867 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 868 } 869 } 870#endif 871 872 p->p_memstat_effectivepriority = priority; 873 874 memorystatus_check_levels_locked(); 875} 876 877int 878memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background) 879{ 880 int ret; 881 882#if !CONFIG_JETSAM 883#pragma unused(update_memlimit, memlimit, memlimit_background) 884#endif 885 886 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); 887 888 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); 889 890 if (priority == -1) { 891 /* Use as shorthand for default priority */ 892 priority = JETSAM_PRIORITY_DEFAULT; 893 } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { 894 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ 895 priority = JETSAM_PRIORITY_IDLE; 896 } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { 897 /* Sanity check */ 898 ret = EINVAL; 899 goto out; 900 } 901 902 proc_list_lock(); 903 904 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); 905 906 if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { 907 ret = EALREADY; 908 proc_list_unlock(); 909 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", pid); 910 goto out; 911 } 912 913 p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; 914 p->p_memstat_userdata = user_data; 915 p->p_memstat_requestedpriority = priority; 916 917#if CONFIG_JETSAM 918 if (update_memlimit) { 919 p->p_memstat_memlimit = memlimit; 920 if (memlimit_background) { 921 /* Will be set as priority is updated */ 922 p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; 923 } else { 924 /* Otherwise, apply now */ 925 if (memorystatus_highwater_enabled) { 926 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 927 } 928 } 929 } 930#endif 931 932 memorystatus_update_priority_locked(p, priority); 933 934 proc_list_unlock(); 935 ret = 0; 936 937out: 938 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); 939 940 return ret; 941} 942 943int 944memorystatus_remove(proc_t p, boolean_t locked) 945{ 946 int ret; 947 memstat_bucket_t *bucket; 948 949 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid); 950 951 if (!locked) { 952 proc_list_lock(); 953 } 954 955 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); 956 957 bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 958 TAILQ_REMOVE(&bucket->list, p, p_memstat_list); 959 bucket->count--; 960 961 memorystatus_list_count--; 962 963 /* If awaiting demotion to the idle band, clean up */ 964 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 965 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 966 memorystatus_reschedule_idle_demotion_locked(); 967 } 968 969 memorystatus_check_levels_locked(); 970 971#if CONFIG_FREEZE 972 if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { 973 memorystatus_frozen_count--; 974 } 975 976 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { 977 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; 978 memorystatus_suspended_count--; 979 } 980#endif 981 982 if (!locked) { 983 proc_list_unlock(); 984 } 985 986 if (p) { 987 ret = 0; 988 } else { 989 ret = ESRCH; 990 } 991 992 return ret; 993} 994 995static boolean_t 996memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { 997 /* See that the process isn't marked for termination */ 998 if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { 999 return FALSE; 1000 } 1001 1002 /* Idle exit requires that process be tracked */ 1003 if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && 1004 !(pcontrol & PROC_DIRTY_TRACK)) { 1005 return FALSE; 1006 } 1007 1008 /* Deferral is only relevant if idle exit is specified */ 1009 if ((pcontrol & PROC_DIRTY_DEFER) && 1010 !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { 1011 return FALSE; 1012 } 1013 1014 return TRUE; 1015} 1016 1017static void 1018memorystatus_update_idle_priority_locked(proc_t p) { 1019 int32_t priority; 1020 1021 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); 1022 1023 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { 1024 priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE; 1025 } else { 1026 priority = p->p_memstat_requestedpriority; 1027 } 1028 1029 memorystatus_update_priority_locked(p, priority); 1030} 1031 1032/* 1033 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle 1034 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted 1035 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low 1036 * priority idle band when clean (and killed earlier, protecting higher priority procesess). 1037 * 1038 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by 1039 * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band 1040 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to 1041 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle 1042 * band. The deferral can be cleared early by clearing the appropriate flag. 1043 * 1044 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process 1045 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be 1046 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. 1047 */ 1048 1049int 1050memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { 1051 unsigned int old_dirty; 1052 boolean_t reschedule = FALSE; 1053 int ret; 1054 1055 proc_list_lock(); 1056 1057 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1058 ret = EPERM; 1059 goto exit; 1060 } 1061 1062 if (!memorystatus_validate_track_flags(p, pcontrol)) { 1063 ret = EINVAL; 1064 goto exit; 1065 } 1066 1067 old_dirty = p->p_memstat_dirty; 1068 1069 /* These bits are cumulative, as per <rdar://problem/11159924> */ 1070 if (pcontrol & PROC_DIRTY_TRACK) { 1071 p->p_memstat_dirty |= P_DIRTY_TRACK; 1072 } 1073 1074 if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { 1075 p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; 1076 } 1077 1078 /* This can be set and cleared exactly once. */ 1079 if ((pcontrol & PROC_DIRTY_DEFER) && !(old_dirty & P_DIRTY_DEFER)) { 1080 p->p_memstat_dirty |= (P_DIRTY_DEFER|P_DIRTY_DEFER_IN_PROGRESS); 1081 } else { 1082 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; 1083 } 1084 1085 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / deferred %s / dirty %s for process %d\n", 1086 ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", 1087 p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS ? "Y" : "N", 1088 p->p_memstat_dirty & P_DIRTY ? "Y" : "N", 1089 p->p_pid); 1090 1091 /* Kick off or invalidate the idle exit deferment if there's a state transition. */ 1092 if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { 1093 if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && 1094 (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && !(old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { 1095 memorystatus_schedule_idle_demotion_locked(p, TRUE); 1096 reschedule = TRUE; 1097 } else if (!(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && (old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { 1098 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1099 reschedule = TRUE; 1100 } 1101 } 1102 1103 memorystatus_update_idle_priority_locked(p); 1104 1105 if (reschedule) { 1106 memorystatus_reschedule_idle_demotion_locked(); 1107 } 1108 1109 ret = 0; 1110 1111exit: 1112 proc_list_unlock(); 1113 1114 return ret; 1115} 1116 1117int 1118memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { 1119 int ret; 1120 boolean_t kill = false; 1121 boolean_t reschedule = FALSE; 1122 boolean_t was_dirty = FALSE; 1123 boolean_t now_dirty = FALSE; 1124 1125 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); 1126 1127 proc_list_lock(); 1128 1129 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1130 ret = EPERM; 1131 goto exit; 1132 } 1133 1134 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) 1135 was_dirty = TRUE; 1136 1137 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { 1138 /* Dirty tracking not enabled */ 1139 ret = EINVAL; 1140 } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { 1141 /* 1142 * Process is set to be terminated and we're attempting to mark it dirty. 1143 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>. 1144 */ 1145 ret = EBUSY; 1146 } else { 1147 int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; 1148 if (pcontrol && !(p->p_memstat_dirty & flag)) { 1149 /* Mark the process as having been dirtied at some point */ 1150 p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); 1151 memorystatus_dirty_count++; 1152 ret = 0; 1153 } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { 1154 if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) { 1155 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ 1156 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1157 kill = true; 1158 } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { 1159 /* Kill previously terminated processes if set clean */ 1160 kill = true; 1161 } 1162 p->p_memstat_dirty &= ~flag; 1163 memorystatus_dirty_count--; 1164 ret = 0; 1165 } else { 1166 /* Already set */ 1167 ret = EALREADY; 1168 } 1169 } 1170 1171 if (ret != 0) { 1172 goto exit; 1173 } 1174 1175 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) 1176 now_dirty = TRUE; 1177 1178 if ((was_dirty == TRUE && now_dirty == FALSE) || 1179 (was_dirty == FALSE && now_dirty == TRUE)) { 1180 1181 /* Manage idle exit deferral, if applied */ 1182 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == 1183 (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { 1184 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { 1185 memorystatus_invalidate_idle_demotion_locked(p, FALSE); 1186 reschedule = TRUE; 1187 } else { 1188 /* We evaluate lazily, so reset the idle-deadline if it's expired by the time the process becomes clean. */ 1189 if (mach_absolute_time() >= p->p_memstat_idledeadline) { 1190 p->p_memstat_idledeadline = 0; 1191 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; 1192 } else { 1193 memorystatus_schedule_idle_demotion_locked(p, FALSE); 1194 reschedule = TRUE; 1195 } 1196 } 1197 } 1198 1199 memorystatus_update_idle_priority_locked(p); 1200 1201 /* If the deferral state changed, reschedule the demotion timer */ 1202 if (reschedule) { 1203 memorystatus_reschedule_idle_demotion_locked(); 1204 } 1205 } 1206 1207 if (kill) { 1208 psignal(p, SIGKILL); 1209 } 1210 1211exit: 1212 proc_list_unlock(); 1213 1214 return ret; 1215} 1216 1217int 1218memorystatus_dirty_get(proc_t p) { 1219 int ret = 0; 1220 1221 proc_list_lock(); 1222 1223 if (p->p_memstat_dirty & P_DIRTY_TRACK) { 1224 ret |= PROC_DIRTY_TRACKED; 1225 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { 1226 ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; 1227 } 1228 if (p->p_memstat_dirty & P_DIRTY) { 1229 ret |= PROC_DIRTY_IS_DIRTY; 1230 } 1231 } 1232 1233 proc_list_unlock(); 1234 1235 return ret; 1236} 1237 1238int 1239memorystatus_on_terminate(proc_t p) { 1240 int sig; 1241 1242 proc_list_lock(); 1243 1244 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1245 1246 if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { 1247 /* Clean; mark as terminated and issue SIGKILL */ 1248 sig = SIGKILL; 1249 } else { 1250 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ 1251 sig = SIGTERM; 1252 } 1253 1254 proc_list_unlock(); 1255 1256 return sig; 1257} 1258 1259void 1260memorystatus_on_suspend(proc_t p) 1261{ 1262#if CONFIG_FREEZE 1263 uint32_t pages; 1264 memorystatus_get_task_page_counts(p->task, &pages, NULL); 1265#endif 1266 proc_list_lock(); 1267#if CONFIG_FREEZE 1268 p->p_memstat_suspendedfootprint = pages; 1269 memorystatus_suspended_footprint_total += pages; 1270 memorystatus_suspended_count++; 1271#endif 1272 p->p_memstat_state |= P_MEMSTAT_SUSPENDED; 1273 proc_list_unlock(); 1274} 1275 1276void 1277memorystatus_on_resume(proc_t p) 1278{ 1279#if CONFIG_FREEZE 1280 boolean_t frozen; 1281 pid_t pid; 1282#endif 1283 1284 proc_list_lock(); 1285 1286#if CONFIG_FREEZE 1287 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); 1288 if (frozen) { 1289 memorystatus_frozen_count--; 1290 p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW; 1291 } 1292 1293 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; 1294 memorystatus_suspended_count--; 1295 1296 pid = p->p_pid; 1297#endif 1298 1299 p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); 1300 1301 proc_list_unlock(); 1302 1303#if CONFIG_FREEZE 1304 if (frozen) { 1305 memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; 1306 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); 1307 } 1308#endif 1309} 1310 1311void 1312memorystatus_on_inactivity(proc_t p) 1313{ 1314#pragma unused(p) 1315#if CONFIG_FREEZE 1316 /* Wake the freeze thread */ 1317 thread_wakeup((event_t)&memorystatus_freeze_wakeup); 1318#endif 1319} 1320 1321static uint32_t 1322memorystatus_build_state(proc_t p) { 1323 uint32_t snapshot_state = 0; 1324 1325 /* General */ 1326 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { 1327 snapshot_state |= kMemorystatusSuspended; 1328 } 1329 if (p->p_memstat_state & P_MEMSTAT_FROZEN) { 1330 snapshot_state |= kMemorystatusFrozen; 1331 } 1332 if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) { 1333 snapshot_state |= kMemorystatusWasThawed; 1334 } 1335 1336 /* Tracking */ 1337 if (p->p_memstat_dirty & P_DIRTY_TRACK) { 1338 snapshot_state |= kMemorystatusTracked; 1339 } 1340 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { 1341 snapshot_state |= kMemorystatusSupportsIdleExit; 1342 } 1343 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { 1344 snapshot_state |= kMemorystatusDirty; 1345 } 1346 1347 return snapshot_state; 1348} 1349 1350#if !CONFIG_JETSAM 1351 1352static boolean_t 1353kill_idle_exit_proc(void) 1354{ 1355 proc_t p, victim_p = PROC_NULL; 1356 uint64_t current_time; 1357 boolean_t killed = FALSE; 1358 unsigned int i = 0; 1359 1360 /* Pick next idle exit victim. */ 1361 current_time = mach_absolute_time(); 1362 1363 proc_list_lock(); 1364 1365 p = memorystatus_get_first_proc_locked(&i, FALSE); 1366 while (p) { 1367 /* No need to look beyond the idle band */ 1368 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { 1369 break; 1370 } 1371 1372 if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { 1373 if (current_time >= p->p_memstat_idledeadline) { 1374 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1375 victim_p = proc_ref_locked(p); 1376 break; 1377 } 1378 } 1379 1380 p = memorystatus_get_next_proc_locked(&i, p, FALSE); 1381 } 1382 1383 proc_list_unlock(); 1384 1385 if (victim_p) { 1386 printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)")); 1387 killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit); 1388 proc_rele(victim_p); 1389 } 1390 1391 return killed; 1392} 1393#endif 1394 1395static void 1396memorystatus_thread_wake(void) { 1397 thread_wakeup((event_t)&memorystatus_wakeup); 1398} 1399 1400static int 1401memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) 1402{ 1403 if (interval_ms) { 1404 assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC); 1405 } else { 1406 assert_wait(&memorystatus_wakeup, THREAD_UNINT); 1407 } 1408 1409 return thread_block(continuation); 1410} 1411 1412extern boolean_t vm_compressor_thrashing_detected; 1413extern uint64_t vm_compressor_total_compressions(void); 1414 1415static void 1416memorystatus_thread(void *param __unused, wait_result_t wr __unused) 1417{ 1418 static boolean_t is_vm_privileged = FALSE; 1419#if CONFIG_JETSAM 1420 boolean_t post_snapshot = FALSE; 1421 uint32_t errors = 0; 1422#endif 1423 1424 if (is_vm_privileged == FALSE) { 1425 /* 1426 * It's the first time the thread has run, so just mark the thread as privileged and block. 1427 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>. 1428 */ 1429 thread_wire(host_priv_self(), current_thread(), TRUE); 1430 is_vm_privileged = TRUE; 1431 1432 memorystatus_thread_block(0, memorystatus_thread); 1433 } 1434 1435#if CONFIG_JETSAM 1436 1437 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, 1438 memorystatus_available_pages, 0, 0, 0, 0); 1439 1440 uint32_t cause = vm_compressor_thrashing_detected ? kMemorystatusKilledVMThrashing : kMemorystatusKilledVMPageShortage; 1441 1442 /* Jetsam aware version. 1443 * 1444 * If woken under pressure, go down the path of killing: 1445 * 1446 * - processes exceeding their highwater mark if no clean victims available 1447 * - the least recently used process if no highwater mark victims available 1448 */ 1449#if !LATENCY_JETSAM 1450 while (vm_compressor_thrashing_detected || memorystatus_available_pages <= memorystatus_available_pages_critical) { 1451#else 1452 while (kill_under_pressure) { 1453 const uint32_t SNAPSHOT_WAIT_TIMEOUT_MS = 100; 1454 wait_result_t wait_result; 1455#endif 1456 boolean_t killed; 1457 int32_t priority; 1458 1459#if LEGACY_HIWATER 1460 /* Highwater */ 1461 killed = memorystatus_kill_hiwat_proc(&errors); 1462 if (killed) { 1463 post_snapshot = TRUE; 1464 goto done; 1465 } 1466#endif 1467 1468 /* LRU */ 1469 killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); 1470 if (killed) { 1471 if (!kill_under_pressure && (priority != JETSAM_PRIORITY_IDLE)) { 1472 /* Don't generate logs for steady-state idle-exit kills */ 1473 post_snapshot = TRUE; 1474 } 1475 goto done; 1476 } 1477 1478 /* Under pressure and unable to kill a process - panic */ 1479 panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); 1480 1481done: 1482 kill_under_pressure = FALSE; 1483 vm_compressor_thrashing_detected = FALSE; 1484 1485#if LATENCY_JETSAM 1486 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_START, 1487 memorystatus_available_pages, 0, 0, 0, 0); 1488 thread_wakeup((event_t)&latency_jetsam_wakeup); 1489 /* 1490 * Coalesce snapshot reports in the face of repeated jetsams by blocking here with a timeout. 1491 * If the wait expires, issue the note. 1492 */ 1493 wait_result = memorystatus_thread_block(SNAPSHOT_WAIT_TIMEOUT_MS, THREAD_CONTINUE_NULL); 1494 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_END, 1495 memorystatus_available_pages, 0, 0, 0, 0); 1496 if (wait_result != THREAD_AWAKENED) { 1497 /* Catch-all */ 1498 break; 1499 } 1500#endif 1501 } 1502 1503 if (errors) { 1504 memorystatus_clear_errors(); 1505 } 1506 1507#if VM_PRESSURE_EVENTS 1508 memorystatus_update_vm_pressure(TRUE); 1509#endif 1510 1511 if (post_snapshot) { 1512 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + 1513 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); 1514 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); 1515 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); 1516 } 1517 1518 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, 1519 memorystatus_available_pages, 0, 0, 0, 0); 1520 1521#else /* CONFIG_JETSAM */ 1522 1523 /* Simple version. 1524 * 1525 * Jetsam not enabled, so just kill the first suitable clean process 1526 * and sleep. 1527 */ 1528 1529 if (kill_idle_exit) { 1530 kill_idle_exit_proc(); 1531 kill_idle_exit = FALSE; 1532 } 1533 1534#endif /* CONFIG_JETSAM */ 1535 1536 memorystatus_thread_block(0, memorystatus_thread); 1537} 1538 1539#if !CONFIG_JETSAM 1540boolean_t memorystatus_idle_exit_from_VM(void) { 1541 kill_idle_exit = TRUE; 1542 memorystatus_thread_wake(); 1543 return TRUE; 1544} 1545#endif 1546 1547#if CONFIG_JETSAM 1548 1549/* 1550 * Callback invoked when allowable physical memory footprint exceeded 1551 * (dirty pages + IOKit mappings) 1552 * 1553 * This is invoked for both advisory, non-fatal per-task high watermarks, 1554 * as well as the fatal system-wide task memory limit. 1555 */ 1556void 1557memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) 1558{ 1559 proc_t p = current_proc(); 1560 1561 printf("process %d (%s) %s physical memory footprint limit of %d MB\n", 1562 p->p_pid, p->p_comm, 1563 warning ? "approaching" : "exceeded", 1564 max_footprint_mb); 1565 1566#if VM_PRESSURE_EVENTS 1567 if (warning == TRUE) { 1568 if (memorystatus_warn_process(p->p_pid) != TRUE) { 1569 /* Print warning, since it's possible that task has not registered for pressure notifications */ 1570 printf("task_exceeded_footprint: failed to warn the current task (exiting?).\n"); 1571 } 1572 return; 1573 } 1574#endif /* VM_PRESSURE_EVENTS */ 1575 1576 if (p->p_memstat_memlimit <= 0) { 1577 /* 1578 * If this process has no high watermark, then we have been invoked because the task 1579 * has violated the system-wide per-task memory limit. 1580 */ 1581 if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { 1582 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); 1583 } 1584 } 1585} 1586 1587static void 1588memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint) 1589{ 1590 assert(task); 1591 assert(footprint); 1592 1593 *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64); 1594 if (max_footprint) { 1595 *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); 1596 } 1597} 1598 1599static int 1600memorystatus_send_note(int event_code, void *data, size_t data_length) { 1601 int ret; 1602 struct kev_msg ev_msg; 1603 1604 ev_msg.vendor_code = KEV_VENDOR_APPLE; 1605 ev_msg.kev_class = KEV_SYSTEM_CLASS; 1606 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; 1607 1608 ev_msg.event_code = event_code; 1609 1610 ev_msg.dv[0].data_length = data_length; 1611 ev_msg.dv[0].data_ptr = data; 1612 ev_msg.dv[1].data_length = 0; 1613 1614 ret = kev_post_msg(&ev_msg); 1615 if (ret) { 1616 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); 1617 } 1618 1619 return ret; 1620} 1621 1622static void 1623memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) 1624{ 1625 unsigned int i; 1626 1627 for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { 1628 if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) { 1629 /* Update if the priority has changed since the snapshot was taken */ 1630 if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) { 1631 memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority; 1632 strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1); 1633 memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p); 1634 memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata; 1635 memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles; 1636 } 1637 memorystatus_jetsam_snapshot_list[i].killed = kill_cause; 1638 return; 1639 } 1640 } 1641} 1642 1643void memorystatus_pages_update(unsigned int pages_avail) 1644{ 1645 boolean_t critical, delta; 1646 1647 if (!memorystatus_delta) { 1648 return; 1649 } 1650 1651 critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; 1652 delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) 1653 || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; 1654 1655 if (critical || delta) { 1656 memorystatus_available_pages = pages_avail; 1657 memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); 1658 1659#if LATENCY_JETSAM 1660 /* Bail early to avoid excessive wake-ups */ 1661 if (critical) { 1662 return; 1663 } 1664#endif 1665 1666 memorystatus_thread_wake(); 1667 } 1668} 1669 1670static boolean_t 1671memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) 1672{ 1673 memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); 1674 1675 entry->pid = p->p_pid; 1676 strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); 1677 entry->priority = p->p_memstat_effectivepriority; 1678 memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages); 1679 entry->state = memorystatus_build_state(p); 1680 entry->user_data = p->p_memstat_userdata; 1681 memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); 1682 1683 return TRUE; 1684} 1685 1686static void 1687memorystatus_jetsam_snapshot_procs_locked(void) 1688{ 1689 proc_t p, next_p; 1690 unsigned int b = 0, i = 0; 1691 kern_return_t kr = KERN_SUCCESS; 1692 1693 mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; 1694 vm_statistics64_data_t vm_stat; 1695 1696 if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { 1697 printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr); 1698 memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats)); 1699 } else { 1700 memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count; 1701 memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count; 1702 memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count; 1703 memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count; 1704 memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count; 1705 memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count; 1706 1707 memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count; 1708 memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count; 1709 memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count; 1710 memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions; 1711 memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions; 1712 memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count; 1713 memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; 1714 } 1715 1716 next_p = memorystatus_get_first_proc_locked(&b, TRUE); 1717 while (next_p) { 1718 p = next_p; 1719 next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); 1720 1721 if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) { 1722 continue; 1723 } 1724 1725 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", 1726 p->p_pid, 1727 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], 1728 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); 1729 1730 if (++i == memorystatus_jetsam_snapshot_max) { 1731 break; 1732 } 1733 } 1734 1735 memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time(); 1736 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i; 1737} 1738 1739#if DEVELOPMENT || DEBUG 1740 1741static int 1742memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { 1743 int ret; 1744 memorystatus_jetsam_panic_options_t debug; 1745 1746 if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { 1747 return EINVAL; 1748 } 1749 1750 ret = copyin(buffer, &debug, buffer_size); 1751 if (ret) { 1752 return ret; 1753 } 1754 1755 /* Panic bits match kMemorystatusKilled* enum */ 1756 memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); 1757 1758 /* Copyout new value */ 1759 debug.data = memorystatus_jetsam_panic_debug; 1760 ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); 1761 1762 return ret; 1763} 1764 1765#endif 1766 1767/* 1768 * Jetsam a specific process. 1769 */ 1770static boolean_t 1771memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { 1772 boolean_t killed; 1773 proc_t p; 1774 1775 /* TODO - add a victim queue and push this into the main jetsam thread */ 1776 1777 p = proc_find(victim_pid); 1778 if (!p) { 1779 return FALSE; 1780 } 1781 1782 printf("memorystatus: specifically killing pid %d [%s] - memorystatus_available_pages: %d\n", 1783 victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); 1784 1785 proc_list_lock(); 1786 1787 if (memorystatus_jetsam_snapshot_count == 0) { 1788 memorystatus_jetsam_snapshot_procs_locked(); 1789 } 1790 1791 memorystatus_update_snapshot_locked(p, cause); 1792 proc_list_unlock(); 1793 1794 killed = memorystatus_do_kill(p, cause); 1795 proc_rele(p); 1796 1797 return killed; 1798} 1799 1800/* 1801 * Jetsam the first process in the queue. 1802 */ 1803static boolean_t 1804memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors) 1805{ 1806 pid_t aPid; 1807 proc_t p = PROC_NULL, next_p = PROC_NULL; 1808 boolean_t new_snapshot = FALSE, killed = FALSE; 1809 unsigned int i = 0; 1810 1811#ifndef CONFIG_FREEZE 1812#pragma unused(any) 1813#endif 1814 1815 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, 1816 memorystatus_available_pages, 0, 0, 0, 0); 1817 1818 proc_list_lock(); 1819 1820 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 1821 while (next_p) { 1822#if DEVELOPMENT || DEBUG 1823 int activeProcess; 1824 int procSuspendedForDiagnosis; 1825#endif /* DEVELOPMENT || DEBUG */ 1826 1827 p = next_p; 1828 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 1829 1830#if DEVELOPMENT || DEBUG 1831 activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; 1832 procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; 1833#endif /* DEVELOPMENT || DEBUG */ 1834 1835 aPid = p->p_pid; 1836 1837 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { 1838 continue; 1839 } 1840 1841#if DEVELOPMENT || DEBUG 1842 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { 1843 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); 1844 continue; 1845 } 1846#endif /* DEVELOPMENT || DEBUG */ 1847 1848#if CONFIG_FREEZE 1849 boolean_t skip; 1850 boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); 1851 if (any || reclaim_proc) { 1852 skip = FALSE; 1853 } else { 1854 skip = TRUE; 1855 } 1856 1857 if (skip) { 1858 continue; 1859 } else 1860#endif 1861 { 1862 if (priority) { 1863 *priority = p->p_memstat_effectivepriority; 1864 } 1865 1866 /* 1867 * Capture a snapshot if none exists and: 1868 * - priority was not requested (this is something other than an ambient kill) 1869 * - the priority was requested *and* the targeted process is not at idle priority 1870 */ 1871 if ((memorystatus_jetsam_snapshot_count == 0) && 1872 ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE)))) { 1873 memorystatus_jetsam_snapshot_procs_locked(); 1874 new_snapshot = TRUE; 1875 } 1876 1877 /* 1878 * Mark as terminated so that if exit1() indicates success, but the process (for example) 1879 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see 1880 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the 1881 * acquisition of the proc lock. 1882 */ 1883 p->p_memstat_state |= P_MEMSTAT_TERMINATED; 1884 1885#if DEVELOPMENT || DEBUG 1886 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { 1887 MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", 1888 aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); 1889 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); 1890 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; 1891 if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { 1892 jetsam_diagnostic_suspended_one_active_proc = 1; 1893 printf("jetsam: returning after suspending first active proc - %d\n", aPid); 1894 } 1895 1896 p = proc_ref_locked(p); 1897 proc_list_unlock(); 1898 if (p) { 1899 task_suspend(p->task); 1900 proc_rele(p); 1901 killed = TRUE; 1902 } 1903 1904 goto exit; 1905 } else 1906#endif /* DEVELOPMENT || DEBUG */ 1907 { 1908 /* Shift queue, update stats */ 1909 memorystatus_update_snapshot_locked(p, cause); 1910 1911 p = proc_ref_locked(p); 1912 proc_list_unlock(); 1913 if (p) { 1914 printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n", 1915 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); 1916 killed = memorystatus_do_kill(p, cause); 1917 } 1918 1919 /* Success? */ 1920 if (killed) { 1921 proc_rele(p); 1922 goto exit; 1923 } 1924 1925 /* Failure - unwind and restart. */ 1926 proc_list_lock(); 1927 proc_rele_locked(p); 1928 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; 1929 p->p_memstat_state |= P_MEMSTAT_ERROR; 1930 *errors += 1; 1931 i = 0; 1932 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 1933 } 1934 } 1935 } 1936 1937 proc_list_unlock(); 1938 1939exit: 1940 /* Clear snapshot if freshly captured and no target was found */ 1941 if (new_snapshot && !killed) { 1942 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 1943 } 1944 1945 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, 1946 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); 1947 1948 return killed; 1949} 1950 1951#if LEGACY_HIWATER 1952 1953static boolean_t 1954memorystatus_kill_hiwat_proc(uint32_t *errors) 1955{ 1956 pid_t aPid = 0; 1957 proc_t p = PROC_NULL, next_p = PROC_NULL; 1958 boolean_t new_snapshot = FALSE, killed = FALSE; 1959 unsigned int i = 0; 1960 1961 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, 1962 memorystatus_available_pages, 0, 0, 0, 0); 1963 1964 proc_list_lock(); 1965 1966 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 1967 while (next_p) { 1968 uint32_t footprint; 1969 boolean_t skip; 1970 1971 p = next_p; 1972 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 1973 1974 aPid = p->p_pid; 1975 1976 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { 1977 continue; 1978 } 1979 1980 /* skip if no limit set */ 1981 if (p->p_memstat_memlimit <= 0) { 1982 continue; 1983 } 1984 1985 /* skip if a currently inapplicable limit is encountered */ 1986 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { 1987 continue; 1988 } 1989 1990 footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); 1991 skip = (((int32_t)footprint) <= p->p_memstat_memlimit); 1992#if DEVELOPMENT || DEBUG 1993 if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { 1994 if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { 1995 continue; 1996 } 1997 } 1998#endif /* DEVELOPMENT || DEBUG */ 1999 2000#if CONFIG_FREEZE 2001 if (!skip) { 2002 if (p->p_memstat_state & P_MEMSTAT_LOCKED) { 2003 skip = TRUE; 2004 } else { 2005 skip = FALSE; 2006 } 2007 } 2008#endif 2009 2010 if (skip) { 2011 continue; 2012 } else { 2013 MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n", 2014 (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat); 2015 2016 if (memorystatus_jetsam_snapshot_count == 0) { 2017 memorystatus_jetsam_snapshot_procs_locked(); 2018 new_snapshot = TRUE; 2019 } 2020 2021 p->p_memstat_state |= P_MEMSTAT_TERMINATED; 2022 2023#if DEVELOPMENT || DEBUG 2024 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 2025 MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); 2026 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); 2027 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; 2028 2029 p = proc_ref_locked(p); 2030 proc_list_unlock(); 2031 if (p) { 2032 task_suspend(p->task); 2033 proc_rele(p); 2034 killed = TRUE; 2035 } 2036 2037 goto exit; 2038 } else 2039#endif /* DEVELOPMENT || DEBUG */ 2040 { 2041 memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat); 2042 2043 p = proc_ref_locked(p); 2044 proc_list_unlock(); 2045 if (p) { 2046 printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", 2047 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); 2048 killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); 2049 } 2050 2051 /* Success? */ 2052 if (killed) { 2053 proc_rele(p); 2054 goto exit; 2055 } 2056 2057 /* Failure - unwind and restart. */ 2058 proc_list_lock(); 2059 proc_rele_locked(p); 2060 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; 2061 p->p_memstat_state |= P_MEMSTAT_ERROR; 2062 *errors += 1; 2063 i = 0; 2064 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2065 } 2066 } 2067 } 2068 2069 proc_list_unlock(); 2070 2071exit: 2072 /* Clear snapshot if freshly captured and no target was found */ 2073 if (new_snapshot && !killed) { 2074 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 2075 } 2076 2077 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, 2078 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); 2079 2080 return killed; 2081} 2082 2083#endif /* LEGACY_HIWATER */ 2084 2085static boolean_t 2086memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { 2087 /* TODO: allow a general async path */ 2088 if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage || cause != kMemorystatusKilledVMThrashing)) { 2089 return FALSE; 2090 } 2091 2092 kill_under_pressure = TRUE; 2093 memorystatus_thread_wake(); 2094 return TRUE; 2095} 2096 2097static boolean_t 2098memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { 2099 boolean_t res; 2100 uint32_t errors = 0; 2101 2102 if (victim_pid == -1) { 2103 /* No pid, so kill first process */ 2104 res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors); 2105 } else { 2106 res = memorystatus_kill_specific_process(victim_pid, cause); 2107 } 2108 2109 if (errors) { 2110 memorystatus_clear_errors(); 2111 } 2112 2113 if (res == TRUE) { 2114 /* Fire off snapshot notification */ 2115 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + 2116 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; 2117 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); 2118 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); 2119 } 2120 2121 return res; 2122} 2123 2124boolean_t 2125memorystatus_kill_on_VM_page_shortage(boolean_t async) { 2126 if (async) { 2127 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); 2128 } else { 2129 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage); 2130 } 2131} 2132 2133boolean_t 2134memorystatus_kill_on_VM_thrashing(boolean_t async) { 2135 if (async) { 2136 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing); 2137 } else { 2138 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing); 2139 } 2140} 2141 2142boolean_t 2143memorystatus_kill_on_vnode_limit(void) { 2144 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); 2145} 2146 2147#endif /* CONFIG_JETSAM */ 2148 2149#if CONFIG_FREEZE 2150 2151__private_extern__ void 2152memorystatus_freeze_init(void) 2153{ 2154 kern_return_t result; 2155 thread_t thread; 2156 2157 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); 2158 if (result == KERN_SUCCESS) { 2159 thread_deallocate(thread); 2160 } else { 2161 panic("Could not create memorystatus_freeze_thread"); 2162 } 2163} 2164 2165static int 2166memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) 2167{ 2168 pid_t aPid = 0; 2169 int ret = -1; 2170 proc_t p = PROC_NULL, next_p = PROC_NULL; 2171 unsigned int i = 0; 2172 2173 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, 2174 memorystatus_available_pages, 0, 0, 0, 0); 2175 2176 proc_list_lock(); 2177 2178 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2179 while (next_p) { 2180 kern_return_t kr; 2181 uint32_t purgeable, wired, clean, dirty; 2182 boolean_t shared; 2183 uint32_t pages; 2184 uint32_t max_pages = 0; 2185 uint32_t state; 2186 2187 p = next_p; 2188 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 2189 2190 aPid = p->p_pid; 2191 state = p->p_memstat_state; 2192 2193 /* Ensure the process is eligible for freezing */ 2194 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { 2195 continue; // with lock held 2196 } 2197 2198 /* Only freeze processes meeting our minimum resident page criteria */ 2199 memorystatus_get_task_page_counts(p->task, &pages, NULL); 2200 if (pages < memorystatus_freeze_pages_min) { 2201 continue; // with lock held 2202 } 2203 2204 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2205 /* Ensure there's enough free space to freeze this process. */ 2206 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); 2207 if (max_pages < memorystatus_freeze_pages_min) { 2208 *memorystatus_freeze_swap_low = TRUE; 2209 proc_list_unlock(); 2210 goto exit; 2211 } 2212 } else { 2213 max_pages = UINT32_MAX - 1; 2214 } 2215 2216 /* Mark as locked temporarily to avoid kill */ 2217 p->p_memstat_state |= P_MEMSTAT_LOCKED; 2218 2219 p = proc_ref_locked(p); 2220 proc_list_unlock(); 2221 if (!p) { 2222 goto exit; 2223 } 2224 2225 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); 2226 2227 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " 2228 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", 2229 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), 2230 memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); 2231 2232 proc_list_lock(); 2233 p->p_memstat_state &= ~P_MEMSTAT_LOCKED; 2234 2235 /* Success? */ 2236 if (KERN_SUCCESS == kr) { 2237 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; 2238 2239 memorystatus_frozen_count++; 2240 2241 p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); 2242 2243 /* Update stats */ 2244 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { 2245 throttle_intervals[i].pageouts += dirty; 2246 } 2247 2248 memorystatus_freeze_pageouts += dirty; 2249 memorystatus_freeze_count++; 2250 2251 proc_list_unlock(); 2252 2253 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); 2254 2255 /* Return the number of reclaimed pages */ 2256 ret = dirty; 2257 2258 } else { 2259 proc_list_unlock(); 2260 } 2261 2262 proc_rele(p); 2263 goto exit; 2264 } 2265 2266 proc_list_unlock(); 2267 2268exit: 2269 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, 2270 memorystatus_available_pages, aPid, 0, 0, 0); 2271 2272 return ret; 2273} 2274 2275static inline boolean_t 2276memorystatus_can_freeze_processes(void) 2277{ 2278 boolean_t ret; 2279 2280 proc_list_lock(); 2281 2282 if (memorystatus_suspended_count) { 2283 uint32_t average_resident_pages, estimated_processes; 2284 2285 /* Estimate the number of suspended processes we can fit */ 2286 average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count; 2287 estimated_processes = memorystatus_suspended_count + 2288 ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages); 2289 2290 /* If it's predicted that no freeze will occur, lower the threshold temporarily */ 2291 if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) { 2292 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW; 2293 } else { 2294 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; 2295 } 2296 2297 MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", 2298 memorystatus_suspended_count, average_resident_pages, estimated_processes); 2299 2300 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { 2301 ret = TRUE; 2302 } else { 2303 ret = FALSE; 2304 } 2305 } else { 2306 ret = FALSE; 2307 } 2308 2309 proc_list_unlock(); 2310 2311 return ret; 2312} 2313 2314static boolean_t 2315memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) 2316{ 2317 /* Only freeze if we're sufficiently low on memory; this holds off freeze right 2318 after boot, and is generally is a no-op once we've reached steady state. */ 2319 if (memorystatus_available_pages > memorystatus_freeze_threshold) { 2320 return FALSE; 2321 } 2322 2323 /* Check minimum suspended process threshold. */ 2324 if (!memorystatus_can_freeze_processes()) { 2325 return FALSE; 2326 } 2327 2328 /* Is swap running low? */ 2329 if (*memorystatus_freeze_swap_low) { 2330 /* If there's been no movement in free swap pages since we last attempted freeze, return. */ 2331 if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { 2332 return FALSE; 2333 } 2334 2335 /* Pages have been freed - we can retry. */ 2336 *memorystatus_freeze_swap_low = FALSE; 2337 } 2338 2339 /* OK */ 2340 return TRUE; 2341} 2342 2343static void 2344memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval) 2345{ 2346 if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) { 2347 if (!interval->max_pageouts) { 2348 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60))); 2349 } else { 2350 printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins); 2351 } 2352 interval->ts.tv_sec = interval->mins * 60; 2353 interval->ts.tv_nsec = 0; 2354 ADD_MACH_TIMESPEC(&interval->ts, ts); 2355 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ 2356 if (interval->pageouts > interval->max_pageouts) { 2357 interval->pageouts -= interval->max_pageouts; 2358 } else { 2359 interval->pageouts = 0; 2360 } 2361 interval->throttle = FALSE; 2362 } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) { 2363 printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins); 2364 interval->throttle = TRUE; 2365 } 2366 2367 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", 2368 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, 2369 interval->throttle ? "on" : "off"); 2370} 2371 2372static boolean_t 2373memorystatus_freeze_update_throttle(void) 2374{ 2375 clock_sec_t sec; 2376 clock_nsec_t nsec; 2377 mach_timespec_t ts; 2378 uint32_t i; 2379 boolean_t throttled = FALSE; 2380 2381#if DEVELOPMENT || DEBUG 2382 if (!memorystatus_freeze_throttle_enabled) 2383 return FALSE; 2384#endif 2385 2386 clock_get_system_nanotime(&sec, &nsec); 2387 ts.tv_sec = sec; 2388 ts.tv_nsec = nsec; 2389 2390 /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget. 2391 * 2392 * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has 2393 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in 2394 * order to allow for bursts of activity. 2395 */ 2396 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { 2397 memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]); 2398 if (throttle_intervals[i].throttle == TRUE) 2399 throttled = TRUE; 2400 } 2401 2402 return throttled; 2403} 2404 2405static void 2406memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) 2407{ 2408 static boolean_t memorystatus_freeze_swap_low = FALSE; 2409 2410 if (memorystatus_freeze_enabled) { 2411 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { 2412 /* Only freeze if we've not exceeded our pageout budgets */ 2413 if (!memorystatus_freeze_update_throttle()) { 2414 memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); 2415 } else { 2416 printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); 2417 memorystatus_freeze_throttle_count++; /* Throttled, update stats */ 2418 } 2419 } 2420 } 2421 2422 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); 2423 thread_block((thread_continue_t) memorystatus_freeze_thread); 2424} 2425 2426#endif /* CONFIG_FREEZE */ 2427 2428#if CONFIG_JETSAM && VM_PRESSURE_EVENTS 2429 2430boolean_t 2431memorystatus_warn_process(pid_t pid) { 2432 return (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0); 2433} 2434 2435static inline boolean_t 2436memorystatus_update_pressure_locked(boolean_t *pressured) { 2437 vm_pressure_level_t old_level, new_level; 2438 2439 old_level = memorystatus_vm_pressure_level; 2440 2441 if (memorystatus_available_pages > memorystatus_available_pages_pressure) { 2442 /* Too many free pages */ 2443 new_level = kVMPressureNormal; 2444 } 2445#if CONFIG_FREEZE 2446 else if (memorystatus_frozen_count > 0) { 2447 /* Frozen processes exist */ 2448 new_level = kVMPressureNormal; 2449 } 2450#endif 2451 else if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) { 2452 /* Too many supended processes */ 2453 new_level = kVMPressureNormal; 2454 } 2455 else if (memorystatus_suspended_count > 0) { 2456 /* Some suspended processes - warn */ 2457 new_level = kVMPressureWarning; 2458 } 2459 else { 2460 /* Otherwise, pressure level is urgent */ 2461 new_level = kVMPressureUrgent; 2462 } 2463 2464 *pressured = (new_level != kVMPressureNormal); 2465 2466 /* Did the pressure level change? */ 2467 if (old_level != new_level) { 2468 MEMORYSTATUS_DEBUG(1, "memorystatus_update_pressure_locked(): memory pressure changed %d -> %d; memorystatus_available_pages: %d\n ", 2469 old_level, new_level, memorystatus_available_pages); 2470 memorystatus_vm_pressure_level = new_level; 2471 return TRUE; 2472 } 2473 2474 return FALSE; 2475} 2476 2477kern_return_t 2478memorystatus_update_vm_pressure(boolean_t target_foreground) { 2479 boolean_t pressure_changed, pressured; 2480 boolean_t warn = FALSE; 2481 2482 /* 2483 * Centralised pressure handling routine. Called from: 2484 * - The main jetsam thread. In this case, we update the pressure level and dispatch warnings to the foreground 2485 * process *only*, each time the available page % drops. 2486 * - The pageout scan path. In this scenario, every other registered process is targeted in footprint order. 2487 * 2488 * This scheme guarantees delivery to the foreground app, while providing for warnings to the remaining processes 2489 * driven by the pageout scan. 2490 */ 2491 2492 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): foreground %d; available %d, critical %d, pressure %d\n", 2493 target_foreground, memorystatus_available_pages, memorystatus_available_pages_critical, memorystatus_available_pages_pressure); 2494 2495 proc_list_lock(); 2496 2497 pressure_changed = memorystatus_update_pressure_locked(&pressured); 2498 2499 if (pressured) { 2500 if (target_foreground) { 2501 if (memorystatus_available_pages != memorystatus_last_foreground_pressure_pages) { 2502 if (memorystatus_available_pages < memorystatus_last_foreground_pressure_pages) { 2503 warn = TRUE; 2504 } 2505 memorystatus_last_foreground_pressure_pages = memorystatus_available_pages; 2506 } 2507 } else { 2508 warn = TRUE; 2509 } 2510 } else if (pressure_changed) { 2511 memorystatus_last_foreground_pressure_pages = (unsigned int)-1; 2512 } 2513 2514 proc_list_unlock(); 2515 2516 /* Target foreground processes if specified */ 2517 if (warn) { 2518 if (target_foreground) { 2519 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_foreground_candidates()\n"); 2520 vm_find_pressure_foreground_candidates(); 2521 } else { 2522 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_candidate()\n"); 2523 /* Defer to VM code. This can race with the foreground priority, but 2524 * it's preferable to holding onto locks for an extended period. */ 2525 vm_find_pressure_candidate(); 2526 } 2527 } 2528 2529 /* Dispatch the global kevent to privileged listeners */ 2530 if (pressure_changed) { 2531 memorystatus_issue_pressure_kevent(pressured); 2532 } 2533 2534 return KERN_SUCCESS; 2535} 2536 2537int 2538memorystatus_send_pressure_note(pid_t pid) { 2539 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); 2540 return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); 2541} 2542 2543boolean_t 2544memorystatus_bg_pressure_eligible(proc_t p) { 2545 boolean_t eligible = FALSE; 2546 2547 proc_list_lock(); 2548 2549 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); 2550 2551 /* Foreground processes have already been dealt with at this point, so just test for eligibility */ 2552 if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { 2553 eligible = TRUE; 2554 } 2555 2556 proc_list_unlock(); 2557 2558 return eligible; 2559} 2560 2561boolean_t 2562memorystatus_is_foreground_locked(proc_t p) { 2563 return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || 2564 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); 2565} 2566 2567#else /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ 2568 2569/* 2570 * Trigger levels to test the mechanism. 2571 * Can be used via a sysctl. 2572 */ 2573#define TEST_LOW_MEMORY_TRIGGER_ONE 1 2574#define TEST_LOW_MEMORY_TRIGGER_ALL 2 2575#define TEST_PURGEABLE_TRIGGER_ONE 3 2576#define TEST_PURGEABLE_TRIGGER_ALL 4 2577#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 2578#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 2579 2580boolean_t memorystatus_manual_testing_on = FALSE; 2581vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; 2582 2583extern struct knote * 2584vm_pressure_select_optimal_candidate_to_notify(struct klist *, int); 2585 2586extern 2587kern_return_t vm_pressure_notification_without_levels(void); 2588 2589extern void vm_pressure_klist_lock(void); 2590extern void vm_pressure_klist_unlock(void); 2591 2592extern void vm_reset_active_list(void); 2593 2594extern void delay(int); 2595 2596#define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ 2597 2598void memorystatus_on_pageout_scan_end(void) { 2599 /* No-op */ 2600} 2601 2602/* 2603 * kn_max - knote 2604 * 2605 * knote_pressure_level - to check if the knote is registered for this notification level. 2606 * 2607 * task - task whose bits we'll be modifying 2608 * 2609 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. 2610 * 2611 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. 2612 * 2613 */ 2614boolean_t 2615is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); 2616 2617boolean_t 2618is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) 2619{ 2620 if (kn_max->kn_sfflags & knote_pressure_level) { 2621 2622 if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) { 2623 2624 task_clear_has_been_notified(task, pressure_level_to_clear); 2625 } 2626 2627 task_mark_has_been_notified(task, pressure_level_to_set); 2628 return TRUE; 2629 } 2630 2631 return FALSE; 2632} 2633 2634extern kern_return_t vm_pressure_notify_dispatch_vm_clients(void); 2635 2636kern_return_t 2637memorystatus_update_vm_pressure(boolean_t target_best_process) 2638{ 2639 struct knote *kn_max = NULL; 2640 pid_t target_pid = -1; 2641 struct klist dispatch_klist = { NULL }; 2642 proc_t target_proc = PROC_NULL; 2643 static vm_pressure_level_t level_snapshot = kVMPressureNormal; 2644 struct task *task = NULL; 2645 boolean_t found_candidate = FALSE; 2646 2647 while (1) { 2648 2649 /* 2650 * There is a race window here. But it's not clear 2651 * how much we benefit from having extra synchronization. 2652 */ 2653 level_snapshot = memorystatus_vm_pressure_level; 2654 2655 memorystatus_klist_lock(); 2656 kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot); 2657 2658 if (kn_max == NULL) { 2659 memorystatus_klist_unlock(); 2660 2661 /* 2662 * No more level-based clients to notify. 2663 * Try the non-level based notification clients. 2664 * 2665 * However, these non-level clients don't understand 2666 * the "return-to-normal" notification. 2667 * 2668 * So don't consider them for those notifications. Just 2669 * return instead. 2670 * 2671 */ 2672 2673 if (level_snapshot != kVMPressureNormal) { 2674 goto try_dispatch_vm_clients; 2675 } else { 2676 return KERN_FAILURE; 2677 } 2678 } 2679 2680 target_proc = kn_max->kn_kq->kq_p; 2681 2682 proc_list_lock(); 2683 if (target_proc != proc_ref_locked(target_proc)) { 2684 target_proc = PROC_NULL; 2685 proc_list_unlock(); 2686 memorystatus_klist_unlock(); 2687 continue; 2688 } 2689 proc_list_unlock(); 2690 memorystatus_klist_unlock(); 2691 2692 target_pid = target_proc->p_pid; 2693 2694 task = (struct task *)(target_proc->task); 2695 2696 if (level_snapshot != kVMPressureNormal) { 2697 2698 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { 2699 2700 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) { 2701 found_candidate = TRUE; 2702 } 2703 } else { 2704 if (level_snapshot == kVMPressureCritical) { 2705 2706 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) { 2707 found_candidate = TRUE; 2708 } 2709 } 2710 } 2711 } else { 2712 if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 2713 2714 task_clear_has_been_notified(task, kVMPressureWarning); 2715 task_clear_has_been_notified(task, kVMPressureCritical); 2716 2717 found_candidate = TRUE; 2718 } 2719 } 2720 2721 if (found_candidate == FALSE) { 2722 continue; 2723 } 2724 2725 memorystatus_klist_lock(); 2726 KNOTE_DETACH(&memorystatus_klist, kn_max); 2727 KNOTE_ATTACH(&dispatch_klist, kn_max); 2728 memorystatus_klist_unlock(); 2729 2730 KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); 2731 2732 memorystatus_klist_lock(); 2733 KNOTE_DETACH(&dispatch_klist, kn_max); 2734 KNOTE_ATTACH(&memorystatus_klist, kn_max); 2735 memorystatus_klist_unlock(); 2736 2737 microuptime(&target_proc->vm_pressure_last_notify_tstamp); 2738 proc_rele(target_proc); 2739 2740 if (target_best_process == TRUE) { 2741 break; 2742 } 2743 2744try_dispatch_vm_clients: 2745 if (level_snapshot != kVMPressureNormal) { 2746 /* 2747 * Wake up idle-exit thread. 2748 * Targets one process per invocation. 2749 * 2750 * TODO: memorystatus_idle_exit_from_VM should return FALSE once it's 2751 * done with all idle-exitable processes. Currently, we will exit this 2752 * loop when we are done with notification clients (level and non-level based) 2753 * but we may still have some idle-exitable processes around. 2754 * 2755 */ 2756 memorystatus_idle_exit_from_VM(); 2757 2758 if ((vm_pressure_notify_dispatch_vm_clients() == KERN_FAILURE) && (kn_max == NULL)) { 2759 /* 2760 * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications 2761 * AND 2762 * we have failed to find any eligible clients for the non-level based notifications too. 2763 * So, we are done. 2764 */ 2765 2766 return KERN_FAILURE; 2767 } 2768 } 2769 2770 if (memorystatus_manual_testing_on == FALSE) { 2771 delay(INTER_NOTIFICATION_DELAY); 2772 } 2773 } 2774 2775 return KERN_SUCCESS; 2776} 2777 2778vm_pressure_level_t 2779convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); 2780 2781vm_pressure_level_t 2782convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) 2783{ 2784 vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 2785 2786 switch (internal_pressure_level) { 2787 2788 case kVMPressureNormal: 2789 { 2790 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 2791 break; 2792 } 2793 2794 case kVMPressureWarning: 2795 case kVMPressureUrgent: 2796 { 2797 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; 2798 break; 2799 } 2800 2801 case kVMPressureCritical: 2802 { 2803 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; 2804 break; 2805 } 2806 2807 default: 2808 break; 2809 } 2810 2811 return dispatch_level; 2812} 2813 2814static int 2815sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS 2816{ 2817#pragma unused(arg1, arg2, oidp) 2818 2819 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); 2820 2821 return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); 2822} 2823 2824SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, 2825 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); 2826 2827 2828extern int memorystatus_purge_on_warning; 2829extern int memorystatus_purge_on_critical; 2830 2831static int 2832sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS 2833{ 2834#pragma unused(arg1, arg2) 2835 2836 int level = 0; 2837 int error = 0; 2838 int pressure_level = 0; 2839 int trigger_request = 0; 2840 int force_purge; 2841 2842 error = sysctl_handle_int(oidp, &level, 0, req); 2843 if (error || !req->newptr) { 2844 return (error); 2845 } 2846 2847 memorystatus_manual_testing_on = TRUE; 2848 2849 trigger_request = (level >> 16) & 0xFFFF; 2850 pressure_level = (level & 0xFFFF); 2851 2852 if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || 2853 trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { 2854 return EINVAL; 2855 } 2856 switch (pressure_level) { 2857 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: 2858 case NOTE_MEMORYSTATUS_PRESSURE_WARN: 2859 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: 2860 break; 2861 default: 2862 return EINVAL; 2863 } 2864 2865 /* 2866 * The pressure level is being set from user-space. 2867 * And user-space uses the constants in sys/event.h 2868 * So we translate those events to our internal levels here. 2869 */ 2870 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 2871 2872 memorystatus_manual_testing_level = kVMPressureNormal; 2873 force_purge = 0; 2874 2875 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { 2876 2877 memorystatus_manual_testing_level = kVMPressureWarning; 2878 force_purge = memorystatus_purge_on_warning; 2879 2880 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { 2881 2882 memorystatus_manual_testing_level = kVMPressureCritical; 2883 force_purge = memorystatus_purge_on_critical; 2884 } 2885 2886 memorystatus_vm_pressure_level = memorystatus_manual_testing_level; 2887 2888 /* purge according to the new pressure level */ 2889 switch (trigger_request) { 2890 case TEST_PURGEABLE_TRIGGER_ONE: 2891 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: 2892 if (force_purge == 0) { 2893 /* no purging requested */ 2894 break; 2895 } 2896 vm_purgeable_object_purge_one_unlocked(force_purge); 2897 break; 2898 case TEST_PURGEABLE_TRIGGER_ALL: 2899 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: 2900 if (force_purge == 0) { 2901 /* no purging requested */ 2902 break; 2903 } 2904 while (vm_purgeable_object_purge_one_unlocked(force_purge)); 2905 break; 2906 } 2907 2908 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || 2909 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { 2910 2911 memorystatus_update_vm_pressure(TRUE); 2912 } 2913 2914 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || 2915 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { 2916 2917 while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { 2918 continue; 2919 } 2920 } 2921 2922 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 2923 memorystatus_manual_testing_on = FALSE; 2924 2925 vm_pressure_klist_lock(); 2926 vm_reset_active_list(); 2927 vm_pressure_klist_unlock(); 2928 } else { 2929 2930 vm_pressure_klist_lock(); 2931 vm_pressure_notification_without_levels(); 2932 vm_pressure_klist_unlock(); 2933 } 2934 2935 return 0; 2936} 2937 2938SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 2939 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); 2940 2941 2942extern int memorystatus_purge_on_warning; 2943extern int memorystatus_purge_on_urgent; 2944extern int memorystatus_purge_on_critical; 2945 2946SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, ""); 2947SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, ""); 2948SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); 2949 2950 2951#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ 2952 2953/* Return both allocated and actual size, since there's a race between allocation and list compilation */ 2954static int 2955memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) 2956{ 2957 uint32_t list_count, i = 0; 2958 memorystatus_priority_entry_t *list_entry; 2959 proc_t p; 2960 2961 list_count = memorystatus_list_count; 2962 *list_size = sizeof(memorystatus_priority_entry_t) * list_count; 2963 2964 /* Just a size check? */ 2965 if (size_only) { 2966 return 0; 2967 } 2968 2969 /* Otherwise, validate the size of the buffer */ 2970 if (*buffer_size < *list_size) { 2971 return EINVAL; 2972 } 2973 2974 *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size); 2975 if (!list_ptr) { 2976 return ENOMEM; 2977 } 2978 2979 memset(*list_ptr, 0, *list_size); 2980 2981 *buffer_size = *list_size; 2982 *list_size = 0; 2983 2984 list_entry = *list_ptr; 2985 2986 proc_list_lock(); 2987 2988 p = memorystatus_get_first_proc_locked(&i, TRUE); 2989 while (p && (*list_size < *buffer_size)) { 2990 list_entry->pid = p->p_pid; 2991 list_entry->priority = p->p_memstat_effectivepriority; 2992 list_entry->user_data = p->p_memstat_userdata; 2993#if LEGACY_HIWATER 2994 if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) || 2995 (p->p_memstat_memlimit <= 0)) { 2996 task_get_phys_footprint_limit(p->task, &list_entry->limit); 2997 } else { 2998 list_entry->limit = p->p_memstat_memlimit; 2999 } 3000#else 3001 task_get_phys_footprint_limit(p->task, &list_entry->limit); 3002#endif 3003 list_entry->state = memorystatus_build_state(p); 3004 list_entry++; 3005 3006 *list_size += sizeof(memorystatus_priority_entry_t); 3007 3008 p = memorystatus_get_next_proc_locked(&i, p, TRUE); 3009 } 3010 3011 proc_list_unlock(); 3012 3013 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); 3014 3015 return 0; 3016} 3017 3018static int 3019memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) { 3020 int error = EINVAL; 3021 boolean_t size_only; 3022 memorystatus_priority_entry_t *list = NULL; 3023 size_t list_size; 3024 3025 size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); 3026 3027 error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); 3028 if (error) { 3029 goto out; 3030 } 3031 3032 if (!size_only) { 3033 error = copyout(list, buffer, list_size); 3034 } 3035 3036 if (error == 0) { 3037 *retval = list_size; 3038 } 3039out: 3040 3041 if (list) { 3042 kfree(list, buffer_size); 3043 } 3044 3045 return error; 3046} 3047 3048#if CONFIG_JETSAM 3049 3050static void 3051memorystatus_clear_errors(void) 3052{ 3053 proc_t p; 3054 unsigned int i = 0; 3055 3056 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); 3057 3058 proc_list_lock(); 3059 3060 p = memorystatus_get_first_proc_locked(&i, TRUE); 3061 while (p) { 3062 if (p->p_memstat_state & P_MEMSTAT_ERROR) { 3063 p->p_memstat_state &= ~P_MEMSTAT_ERROR; 3064 } 3065 p = memorystatus_get_next_proc_locked(&i, p, TRUE); 3066 } 3067 3068 proc_list_unlock(); 3069 3070 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); 3071} 3072 3073static void 3074memorystatus_update_levels_locked(boolean_t critical_only) { 3075 memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; 3076#if !LATENCY_JETSAM 3077 { 3078 // If there's an entry in the first bucket, we have idle processes 3079 memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; 3080 if (first_bucket->count) { 3081 memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; 3082 } 3083 } 3084#endif 3085#if DEBUG || DEVELOPMENT 3086 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 3087 memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; 3088 } 3089#endif 3090 3091 if (critical_only) { 3092 return; 3093 } 3094 3095#if VM_PRESSURE_EVENTS 3096 memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; 3097#if DEBUG || DEVELOPMENT 3098 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 3099 memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic; 3100 } 3101#endif 3102#endif 3103} 3104 3105static int 3106memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { 3107 size_t input_size = *snapshot_size; 3108 3109 if (memorystatus_jetsam_snapshot_count > 0) { 3110 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); 3111 } else { 3112 *snapshot_size = 0; 3113 } 3114 3115 if (size_only) { 3116 return 0; 3117 } 3118 3119 if (input_size < *snapshot_size) { 3120 return EINVAL; 3121 } 3122 3123 *snapshot = memorystatus_jetsam_snapshot; 3124 3125 MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size); 3126 3127 return 0; 3128} 3129 3130static int 3131memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { 3132 int error = EINVAL; 3133 boolean_t size_only; 3134 memorystatus_jetsam_snapshot_t *snapshot; 3135 3136 size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); 3137 3138 error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only); 3139 if (error) { 3140 goto out; 3141 } 3142 3143 /* Copy out and reset */ 3144 if (!size_only) { 3145 if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { 3146 snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 3147 } 3148 } 3149 3150 if (error == 0) { 3151 *retval = buffer_size; 3152 } 3153out: 3154 return error; 3155} 3156 3157static int 3158memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { 3159 const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ 3160 3161 int error; 3162 uint32_t i; 3163 uint32_t entry_count; 3164 memorystatus_priority_properties_t *entries; 3165 3166 /* Validate inputs */ 3167 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) { 3168 return EINVAL; 3169 } 3170 3171 /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */ 3172 entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t)); 3173 if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) { 3174 return EINVAL; 3175 } 3176 3177 entries = (memorystatus_priority_properties_t *)kalloc(buffer_size); 3178 3179 error = copyin(buffer, entries, buffer_size); 3180 3181 for (i = 0; i < entry_count; i++) { 3182 proc_t p; 3183 3184 if (error) { 3185 break; 3186 } 3187 3188 p = proc_find(pid); 3189 if (!p) { 3190 error = ESRCH; 3191 break; 3192 } 3193 3194 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 3195 error = EPERM; 3196 proc_rele(p); 3197 break; 3198 } 3199 3200 error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0); 3201 proc_rele(p); 3202 } 3203 3204 kfree(entries, buffer_size); 3205 3206 return error; 3207} 3208 3209static int 3210memorystatus_cmd_get_pressure_status(int32_t *retval) { 3211 int error; 3212 3213 /* Need privilege for check */ 3214 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); 3215 if (error) { 3216 return (error); 3217 } 3218 3219 /* Inherently racy, so it's not worth taking a lock here */ 3220 *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; 3221 3222 return error; 3223} 3224 3225static int 3226memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid, int32_t high_water_mark, __unused int32_t *retval) { 3227 int error = 0; 3228 3229 proc_t p = proc_find(pid); 3230 if (!p) { 3231 return ESRCH; 3232 } 3233 3234 if (high_water_mark <= 0) { 3235 high_water_mark = -1; /* Disable */ 3236 } 3237 3238 proc_list_lock(); 3239 3240 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 3241 error = EPERM; 3242 goto exit; 3243 } 3244 3245 p->p_memstat_memlimit = high_water_mark; 3246 if (memorystatus_highwater_enabled) { 3247 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { 3248 memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority); 3249 } else { 3250 error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; 3251 } 3252 } 3253 3254exit: 3255 proc_list_unlock(); 3256 proc_rele(p); 3257 3258 return error; 3259} 3260 3261#endif /* CONFIG_JETSAM */ 3262 3263int 3264memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) { 3265 int error = EINVAL; 3266 3267#if !CONFIG_JETSAM 3268 #pragma unused(ret) 3269#endif 3270 3271 /* Root only for now */ 3272 if (!kauth_cred_issuser(kauth_cred_get())) { 3273 error = EPERM; 3274 goto out; 3275 } 3276 3277 /* Sanity check */ 3278 if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { 3279 error = EINVAL; 3280 goto out; 3281 } 3282 3283 switch (args->command) { 3284 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: 3285 error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret); 3286 break; 3287#if CONFIG_JETSAM 3288 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: 3289 error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); 3290 break; 3291 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: 3292 error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); 3293 break; 3294 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: 3295 error = memorystatus_cmd_get_pressure_status(ret); 3296 break; 3297 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: 3298 /* TODO: deprecate. Keeping it in as there's no pid based way to set the ledger limit right now. */ 3299 error = memorystatus_cmd_set_jetsam_high_water_mark(args->pid, (int32_t)args->flags, ret); 3300 break; 3301 /* Test commands */ 3302#if DEVELOPMENT || DEBUG 3303 case MEMORYSTATUS_CMD_TEST_JETSAM: 3304 error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; 3305 break; 3306 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: 3307 error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); 3308 break; 3309#endif /* DEVELOPMENT || DEBUG */ 3310#endif /* CONFIG_JETSAM */ 3311 default: 3312 break; 3313 } 3314 3315out: 3316 return error; 3317} 3318 3319 3320static int 3321filt_memorystatusattach(struct knote *kn) 3322{ 3323 kn->kn_flags |= EV_CLEAR; 3324 return memorystatus_knote_register(kn); 3325} 3326 3327static void 3328filt_memorystatusdetach(struct knote *kn) 3329{ 3330 memorystatus_knote_unregister(kn); 3331} 3332 3333static int 3334filt_memorystatus(struct knote *kn __unused, long hint) 3335{ 3336 if (hint) { 3337 switch (hint) { 3338 case kMemorystatusNoPressure: 3339 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 3340 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 3341 } 3342 break; 3343 case kMemorystatusPressure: 3344 if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { 3345 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { 3346 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; 3347 } 3348 } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { 3349 3350 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { 3351 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; 3352 } 3353 } 3354 break; 3355 default: 3356 break; 3357 } 3358 } 3359 3360 return (kn->kn_fflags != 0); 3361} 3362 3363static void 3364memorystatus_klist_lock(void) { 3365 lck_mtx_lock(&memorystatus_klist_mutex); 3366} 3367 3368static void 3369memorystatus_klist_unlock(void) { 3370 lck_mtx_unlock(&memorystatus_klist_mutex); 3371} 3372 3373void 3374memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) { 3375 lck_mtx_init(&memorystatus_klist_mutex, grp, attr); 3376 klist_init(&memorystatus_klist); 3377} 3378 3379int 3380memorystatus_knote_register(struct knote *kn) { 3381 int error = 0; 3382 3383 memorystatus_klist_lock(); 3384 3385 if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL)) { 3386 3387#if CONFIG_JETSAM && VM_PRESSURE_EVENTS 3388 /* Need a privilege to register */ 3389 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); 3390#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ 3391 3392 if (!error) { 3393 KNOTE_ATTACH(&memorystatus_klist, kn); 3394 } 3395 } else { 3396 error = ENOTSUP; 3397 } 3398 3399 memorystatus_klist_unlock(); 3400 3401 return error; 3402} 3403 3404void 3405memorystatus_knote_unregister(struct knote *kn __unused) { 3406 memorystatus_klist_lock(); 3407 KNOTE_DETACH(&memorystatus_klist, kn); 3408 memorystatus_klist_unlock(); 3409} 3410 3411#if CONFIG_JETSAM && VM_PRESSURE_EVENTS 3412static boolean_t 3413memorystatus_issue_pressure_kevent(boolean_t pressured) { 3414 memorystatus_klist_lock(); 3415 KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); 3416 memorystatus_klist_unlock(); 3417 return TRUE; 3418} 3419 3420#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ 3421