1240868Spjd/* 2240868Spjd * CDDL HEADER START 3240868Spjd * 4240868Spjd * The contents of this file are subject to the terms of the 5240868Spjd * Common Development and Distribution License (the "License"). 6240868Spjd * You may not use this file except in compliance with the License. 7240868Spjd * 8240868Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9240868Spjd * or http://www.opensolaris.org/os/licensing. 10240868Spjd * See the License for the specific language governing permissions 11240868Spjd * and limitations under the License. 12240868Spjd * 13240868Spjd * When distributing Covered Code, include this CDDL HEADER in each 14240868Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15240868Spjd * If applicable, add the following below this CDDL HEADER, with the 16240868Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17240868Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18240868Spjd * 19240868Spjd * CDDL HEADER END 20240868Spjd */ 21240868Spjd/* 22240868Spjd * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. 23240868Spjd * All rights reserved. 24240868Spjd */ 25240868Spjd 26240868Spjd#include <sys/zfs_context.h> 27240868Spjd#include <sys/spa_impl.h> 28240868Spjd#include <sys/vdev_impl.h> 29240868Spjd#include <sys/trim_map.h> 30248575Ssmh#include <sys/time.h> 31240868Spjd 32244187Ssmh/* 33244187Ssmh * Calculate the zio end, upgrading based on ashift which would be 34244187Ssmh * done by zio_vdev_io_start. 35244187Ssmh * 36244187Ssmh * This makes free range consolidation much more effective 37244187Ssmh * than it would otherwise be as well as ensuring that entire 38244187Ssmh * blocks are invalidated by writes. 39244187Ssmh */ 40248572Ssmh#define TRIM_ZIO_END(vd, offset, size) (offset + \ 41248572Ssmh P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift)) 42244187Ssmh 43248577Ssmh#define TRIM_MAP_SINC(tm, size) \ 44248577Ssmh atomic_add_64(&(tm)->tm_bytes, (size)) 45248577Ssmh 46248577Ssmh#define TRIM_MAP_SDEC(tm, size) \ 47248602Ssmh atomic_add_64(&(tm)->tm_bytes, -(size)) 48248577Ssmh 49248577Ssmh#define TRIM_MAP_QINC(tm) \ 50248577Ssmh atomic_inc_64(&(tm)->tm_pending); \ 51248577Ssmh 52248577Ssmh#define TRIM_MAP_QDEC(tm) \ 53248577Ssmh atomic_dec_64(&(tm)->tm_pending); 54248577Ssmh 55240868Spjdtypedef struct trim_map { 56240868Spjd list_t tm_head; /* List of segments sorted by txg. */ 57240868Spjd avl_tree_t tm_queued_frees; /* AVL tree of segments waiting for TRIM. */ 58240868Spjd avl_tree_t tm_inflight_frees; /* AVL tree of in-flight TRIMs. */ 59240868Spjd avl_tree_t tm_inflight_writes; /* AVL tree of in-flight writes. */ 60240868Spjd list_t tm_pending_writes; /* Writes blocked on in-flight frees. */ 61240868Spjd kmutex_t tm_lock; 62248577Ssmh uint64_t tm_pending; /* Count of pending TRIMs. */ 63248577Ssmh uint64_t tm_bytes; /* Total size in bytes of queued TRIMs. */ 64240868Spjd} trim_map_t; 65240868Spjd 66240868Spjdtypedef struct trim_seg { 67240868Spjd avl_node_t ts_node; /* AVL node. */ 68240868Spjd list_node_t ts_next; /* List element. */ 69240868Spjd uint64_t ts_start; /* Starting offset of this segment. */ 70240868Spjd uint64_t ts_end; /* Ending offset (non-inclusive). */ 71240868Spjd uint64_t ts_txg; /* Segment creation txg. */ 72248575Ssmh hrtime_t ts_time; /* Segment creation time. */ 73240868Spjd} trim_seg_t; 74240868Spjd 75249921Ssmhextern boolean_t zfs_trim_enabled; 76240868Spjd 77248577Ssmhstatic u_int trim_txg_delay = 32; 78248577Ssmhstatic u_int trim_timeout = 30; 79248577Ssmhstatic u_int trim_max_interval = 1; 80248577Ssmh/* Limit outstanding TRIMs to 2G (max size for a single TRIM request) */ 81248577Ssmhstatic uint64_t trim_vdev_max_bytes = 2147483648; 82248577Ssmh/* Limit outstanding TRIMs to 64 (max ranges for a single TRIM request) */ 83248577Ssmhstatic u_int trim_vdev_max_pending = 64; 84248577Ssmh 85240868SpjdSYSCTL_DECL(_vfs_zfs); 86248577SsmhSYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM"); 87240868Spjd 88248577SsmhTUNABLE_INT("vfs.zfs.trim.txg_delay", &trim_txg_delay); 89248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay, 90248577Ssmh 0, "Delay TRIMs by up to this many TXGs"); 91248575Ssmh 92248577SsmhTUNABLE_INT("vfs.zfs.trim.timeout", &trim_timeout); 93248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0, 94248577Ssmh "Delay TRIMs by up to this many seconds"); 95248577Ssmh 96248577SsmhTUNABLE_INT("vfs.zfs.trim.max_interval", &trim_max_interval); 97248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN, 98248577Ssmh &trim_max_interval, 0, 99248577Ssmh "Maximum interval between TRIM queue processing (seconds)"); 100248577Ssmh 101248577SsmhSYSCTL_DECL(_vfs_zfs_vdev); 102248577SsmhTUNABLE_QUAD("vfs.zfs.vdev.trim_max_bytes", &trim_vdev_max_bytes); 103248577SsmhSYSCTL_QUAD(_vfs_zfs_vdev, OID_AUTO, trim_max_bytes, CTLFLAG_RWTUN, 104248577Ssmh &trim_vdev_max_bytes, 0, 105248577Ssmh "Maximum pending TRIM bytes for a vdev"); 106248577Ssmh 107248577SsmhTUNABLE_INT("vfs.zfs.vdev.trim_max_pending", &trim_vdev_max_pending); 108248577SsmhSYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN, 109248577Ssmh &trim_vdev_max_pending, 0, 110248577Ssmh "Maximum pending TRIM segments for a vdev"); 111248577Ssmh 112248577Ssmh 113240868Spjdstatic void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd); 114240868Spjd 115240868Spjdstatic int 116240868Spjdtrim_map_seg_compare(const void *x1, const void *x2) 117240868Spjd{ 118240868Spjd const trim_seg_t *s1 = x1; 119240868Spjd const trim_seg_t *s2 = x2; 120240868Spjd 121240868Spjd if (s1->ts_start < s2->ts_start) { 122240868Spjd if (s1->ts_end > s2->ts_start) 123240868Spjd return (0); 124240868Spjd return (-1); 125240868Spjd } 126240868Spjd if (s1->ts_start > s2->ts_start) { 127240868Spjd if (s1->ts_start < s2->ts_end) 128240868Spjd return (0); 129240868Spjd return (1); 130240868Spjd } 131240868Spjd return (0); 132240868Spjd} 133240868Spjd 134240868Spjdstatic int 135240868Spjdtrim_map_zio_compare(const void *x1, const void *x2) 136240868Spjd{ 137240868Spjd const zio_t *z1 = x1; 138240868Spjd const zio_t *z2 = x2; 139240868Spjd 140240868Spjd if (z1->io_offset < z2->io_offset) { 141240868Spjd if (z1->io_offset + z1->io_size > z2->io_offset) 142240868Spjd return (0); 143240868Spjd return (-1); 144240868Spjd } 145240868Spjd if (z1->io_offset > z2->io_offset) { 146240868Spjd if (z1->io_offset < z2->io_offset + z2->io_size) 147240868Spjd return (0); 148240868Spjd return (1); 149240868Spjd } 150240868Spjd return (0); 151240868Spjd} 152240868Spjd 153240868Spjdvoid 154240868Spjdtrim_map_create(vdev_t *vd) 155240868Spjd{ 156240868Spjd trim_map_t *tm; 157240868Spjd 158284193Sdelphij ASSERT(zfs_trim_enabled && !vd->vdev_notrim && 159284193Sdelphij vd->vdev_ops->vdev_op_leaf); 160240868Spjd 161240868Spjd tm = kmem_zalloc(sizeof (*tm), KM_SLEEP); 162240868Spjd mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL); 163240868Spjd list_create(&tm->tm_head, sizeof (trim_seg_t), 164240868Spjd offsetof(trim_seg_t, ts_next)); 165240868Spjd list_create(&tm->tm_pending_writes, sizeof (zio_t), 166240868Spjd offsetof(zio_t, io_trim_link)); 167240868Spjd avl_create(&tm->tm_queued_frees, trim_map_seg_compare, 168240868Spjd sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node)); 169240868Spjd avl_create(&tm->tm_inflight_frees, trim_map_seg_compare, 170240868Spjd sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node)); 171240868Spjd avl_create(&tm->tm_inflight_writes, trim_map_zio_compare, 172240868Spjd sizeof (zio_t), offsetof(zio_t, io_trim_node)); 173240868Spjd vd->vdev_trimmap = tm; 174240868Spjd} 175240868Spjd 176240868Spjdvoid 177240868Spjdtrim_map_destroy(vdev_t *vd) 178240868Spjd{ 179240868Spjd trim_map_t *tm; 180240868Spjd trim_seg_t *ts; 181240868Spjd 182240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 183240868Spjd 184249921Ssmh if (!zfs_trim_enabled) 185240868Spjd return; 186240868Spjd 187240868Spjd tm = vd->vdev_trimmap; 188240868Spjd if (tm == NULL) 189240868Spjd return; 190240868Spjd 191240868Spjd /* 192240868Spjd * We may have been called before trim_map_vdev_commit_done() 193240868Spjd * had a chance to run, so do it now to prune the remaining 194240868Spjd * inflight frees. 195240868Spjd */ 196240868Spjd trim_map_vdev_commit_done(vd->vdev_spa, vd); 197240868Spjd 198240868Spjd mutex_enter(&tm->tm_lock); 199240868Spjd while ((ts = list_head(&tm->tm_head)) != NULL) { 200240868Spjd avl_remove(&tm->tm_queued_frees, ts); 201240868Spjd list_remove(&tm->tm_head, ts); 202240868Spjd kmem_free(ts, sizeof (*ts)); 203248577Ssmh TRIM_MAP_SDEC(tm, ts->ts_end - ts->ts_start); 204248577Ssmh TRIM_MAP_QDEC(tm); 205240868Spjd } 206240868Spjd mutex_exit(&tm->tm_lock); 207240868Spjd 208240868Spjd avl_destroy(&tm->tm_queued_frees); 209240868Spjd avl_destroy(&tm->tm_inflight_frees); 210240868Spjd avl_destroy(&tm->tm_inflight_writes); 211240868Spjd list_destroy(&tm->tm_pending_writes); 212240868Spjd list_destroy(&tm->tm_head); 213240868Spjd mutex_destroy(&tm->tm_lock); 214240868Spjd kmem_free(tm, sizeof (*tm)); 215240868Spjd vd->vdev_trimmap = NULL; 216240868Spjd} 217240868Spjd 218240868Spjdstatic void 219240868Spjdtrim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) 220240868Spjd{ 221240868Spjd avl_index_t where; 222240868Spjd trim_seg_t tsearch, *ts_before, *ts_after, *ts; 223240868Spjd boolean_t merge_before, merge_after; 224248575Ssmh hrtime_t time; 225240868Spjd 226240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 227240868Spjd VERIFY(start < end); 228240868Spjd 229248575Ssmh time = gethrtime(); 230240868Spjd tsearch.ts_start = start; 231240868Spjd tsearch.ts_end = end; 232240868Spjd 233240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, &where); 234240868Spjd if (ts != NULL) { 235240868Spjd if (start < ts->ts_start) 236240868Spjd trim_map_segment_add(tm, start, ts->ts_start, txg); 237240868Spjd if (end > ts->ts_end) 238240868Spjd trim_map_segment_add(tm, ts->ts_end, end, txg); 239240868Spjd return; 240240868Spjd } 241240868Spjd 242240868Spjd ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE); 243240868Spjd ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER); 244240868Spjd 245248577Ssmh merge_before = (ts_before != NULL && ts_before->ts_end == start); 246248577Ssmh merge_after = (ts_after != NULL && ts_after->ts_start == end); 247240868Spjd 248240868Spjd if (merge_before && merge_after) { 249248577Ssmh TRIM_MAP_SINC(tm, ts_after->ts_start - ts_before->ts_end); 250248577Ssmh TRIM_MAP_QDEC(tm); 251240868Spjd avl_remove(&tm->tm_queued_frees, ts_before); 252240868Spjd list_remove(&tm->tm_head, ts_before); 253240868Spjd ts_after->ts_start = ts_before->ts_start; 254248577Ssmh ts_after->ts_txg = txg; 255248577Ssmh ts_after->ts_time = time; 256240868Spjd kmem_free(ts_before, sizeof (*ts_before)); 257240868Spjd } else if (merge_before) { 258248577Ssmh TRIM_MAP_SINC(tm, end - ts_before->ts_end); 259240868Spjd ts_before->ts_end = end; 260248577Ssmh ts_before->ts_txg = txg; 261248577Ssmh ts_before->ts_time = time; 262240868Spjd } else if (merge_after) { 263248577Ssmh TRIM_MAP_SINC(tm, ts_after->ts_start - start); 264240868Spjd ts_after->ts_start = start; 265248577Ssmh ts_after->ts_txg = txg; 266248577Ssmh ts_after->ts_time = time; 267240868Spjd } else { 268248577Ssmh TRIM_MAP_SINC(tm, end - start); 269248577Ssmh TRIM_MAP_QINC(tm); 270240868Spjd ts = kmem_alloc(sizeof (*ts), KM_SLEEP); 271240868Spjd ts->ts_start = start; 272240868Spjd ts->ts_end = end; 273240868Spjd ts->ts_txg = txg; 274248575Ssmh ts->ts_time = time; 275240868Spjd avl_insert(&tm->tm_queued_frees, ts, where); 276240868Spjd list_insert_tail(&tm->tm_head, ts); 277240868Spjd } 278240868Spjd} 279240868Spjd 280240868Spjdstatic void 281240868Spjdtrim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start, 282240868Spjd uint64_t end) 283240868Spjd{ 284240868Spjd trim_seg_t *nts; 285240868Spjd boolean_t left_over, right_over; 286240868Spjd 287240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 288240868Spjd 289240868Spjd left_over = (ts->ts_start < start); 290240868Spjd right_over = (ts->ts_end > end); 291240868Spjd 292248577Ssmh TRIM_MAP_SDEC(tm, end - start); 293240868Spjd if (left_over && right_over) { 294240868Spjd nts = kmem_alloc(sizeof (*nts), KM_SLEEP); 295240868Spjd nts->ts_start = end; 296240868Spjd nts->ts_end = ts->ts_end; 297240868Spjd nts->ts_txg = ts->ts_txg; 298248575Ssmh nts->ts_time = ts->ts_time; 299240868Spjd ts->ts_end = start; 300240868Spjd avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER); 301240868Spjd list_insert_after(&tm->tm_head, ts, nts); 302248577Ssmh TRIM_MAP_QINC(tm); 303240868Spjd } else if (left_over) { 304240868Spjd ts->ts_end = start; 305240868Spjd } else if (right_over) { 306240868Spjd ts->ts_start = end; 307240868Spjd } else { 308240868Spjd avl_remove(&tm->tm_queued_frees, ts); 309240868Spjd list_remove(&tm->tm_head, ts); 310248577Ssmh TRIM_MAP_QDEC(tm); 311240868Spjd kmem_free(ts, sizeof (*ts)); 312240868Spjd } 313240868Spjd} 314240868Spjd 315240868Spjdstatic void 316240868Spjdtrim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) 317240868Spjd{ 318240868Spjd zio_t zsearch, *zs; 319240868Spjd 320240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 321240868Spjd 322240868Spjd zsearch.io_offset = start; 323240868Spjd zsearch.io_size = end - start; 324240868Spjd 325240868Spjd zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL); 326240868Spjd if (zs == NULL) { 327240868Spjd trim_map_segment_add(tm, start, end, txg); 328240868Spjd return; 329240868Spjd } 330240868Spjd if (start < zs->io_offset) 331240868Spjd trim_map_free_locked(tm, start, zs->io_offset, txg); 332240868Spjd if (zs->io_offset + zs->io_size < end) 333240868Spjd trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg); 334240868Spjd} 335240868Spjd 336240868Spjdvoid 337248574Ssmhtrim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg) 338240868Spjd{ 339240868Spjd trim_map_t *tm = vd->vdev_trimmap; 340240868Spjd 341249921Ssmh if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL) 342240868Spjd return; 343240868Spjd 344240868Spjd mutex_enter(&tm->tm_lock); 345248574Ssmh trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg); 346240868Spjd mutex_exit(&tm->tm_lock); 347240868Spjd} 348240868Spjd 349240868Spjdboolean_t 350240868Spjdtrim_map_write_start(zio_t *zio) 351240868Spjd{ 352240868Spjd vdev_t *vd = zio->io_vd; 353240868Spjd trim_map_t *tm = vd->vdev_trimmap; 354240868Spjd trim_seg_t tsearch, *ts; 355240868Spjd boolean_t left_over, right_over; 356240868Spjd uint64_t start, end; 357240868Spjd 358249921Ssmh if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL) 359240868Spjd return (B_TRUE); 360240868Spjd 361240868Spjd start = zio->io_offset; 362248572Ssmh end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size); 363240868Spjd tsearch.ts_start = start; 364240868Spjd tsearch.ts_end = end; 365240868Spjd 366240868Spjd mutex_enter(&tm->tm_lock); 367240868Spjd 368240868Spjd /* 369240868Spjd * Checking for colliding in-flight frees. 370240868Spjd */ 371240868Spjd ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL); 372240868Spjd if (ts != NULL) { 373240868Spjd list_insert_tail(&tm->tm_pending_writes, zio); 374240868Spjd mutex_exit(&tm->tm_lock); 375240868Spjd return (B_FALSE); 376240868Spjd } 377240868Spjd 378240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL); 379240868Spjd if (ts != NULL) { 380240868Spjd /* 381240868Spjd * Loop until all overlapping segments are removed. 382240868Spjd */ 383240868Spjd do { 384240868Spjd trim_map_segment_remove(tm, ts, start, end); 385240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL); 386240868Spjd } while (ts != NULL); 387240868Spjd } 388240868Spjd avl_add(&tm->tm_inflight_writes, zio); 389240868Spjd 390240868Spjd mutex_exit(&tm->tm_lock); 391240868Spjd 392240868Spjd return (B_TRUE); 393240868Spjd} 394240868Spjd 395240868Spjdvoid 396240868Spjdtrim_map_write_done(zio_t *zio) 397240868Spjd{ 398240868Spjd vdev_t *vd = zio->io_vd; 399240868Spjd trim_map_t *tm = vd->vdev_trimmap; 400240868Spjd 401240868Spjd /* 402240868Spjd * Don't check for vdev_notrim, since the write could have 403240868Spjd * started before vdev_notrim was set. 404240868Spjd */ 405249921Ssmh if (!zfs_trim_enabled || tm == NULL) 406240868Spjd return; 407240868Spjd 408240868Spjd mutex_enter(&tm->tm_lock); 409240868Spjd /* 410240868Spjd * Don't fail if the write isn't in the tree, since the write 411240868Spjd * could have started after vdev_notrim was set. 412240868Spjd */ 413240868Spjd if (zio->io_trim_node.avl_child[0] || 414240868Spjd zio->io_trim_node.avl_child[1] || 415240868Spjd AVL_XPARENT(&zio->io_trim_node) || 416240868Spjd tm->tm_inflight_writes.avl_root == &zio->io_trim_node) 417240868Spjd avl_remove(&tm->tm_inflight_writes, zio); 418240868Spjd mutex_exit(&tm->tm_lock); 419240868Spjd} 420240868Spjd 421240868Spjd/* 422248577Ssmh * Return the oldest segment (the one with the lowest txg / time) or NULL if: 423248577Ssmh * 1. The list is empty 424248577Ssmh * 2. The first element's txg is greater than txgsafe 425248577Ssmh * 3. The first element's txg is not greater than the txg argument and the 426248577Ssmh * the first element's time is not greater than time argument 427240868Spjd */ 428240868Spjdstatic trim_seg_t * 429248577Ssmhtrim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time) 430240868Spjd{ 431240868Spjd trim_seg_t *ts; 432240868Spjd 433240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 434248577Ssmh VERIFY(txgsafe >= txg); 435240868Spjd 436240868Spjd ts = list_head(&tm->tm_head); 437248577Ssmh if (ts != NULL && ts->ts_txg <= txgsafe && 438248577Ssmh (ts->ts_txg <= txg || ts->ts_time <= time || 439248577Ssmh tm->tm_bytes > trim_vdev_max_bytes || 440248577Ssmh tm->tm_pending > trim_vdev_max_pending)) 441240868Spjd return (ts); 442240868Spjd return (NULL); 443240868Spjd} 444240868Spjd 445240868Spjdstatic void 446240868Spjdtrim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) 447240868Spjd{ 448240868Spjd trim_map_t *tm = vd->vdev_trimmap; 449240868Spjd trim_seg_t *ts; 450270312Ssmh uint64_t size, offset, txgtarget, txgsafe; 451248575Ssmh hrtime_t timelimit; 452240868Spjd 453240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 454240868Spjd 455240868Spjd if (tm == NULL) 456240868Spjd return; 457240868Spjd 458248577Ssmh timelimit = gethrtime() - trim_timeout * NANOSEC; 459248575Ssmh if (vd->vdev_isl2cache) { 460248577Ssmh txgsafe = UINT64_MAX; 461248577Ssmh txgtarget = UINT64_MAX; 462248575Ssmh } else { 463248577Ssmh txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)); 464248577Ssmh if (txgsafe > trim_txg_delay) 465248577Ssmh txgtarget = txgsafe - trim_txg_delay; 466248577Ssmh else 467248577Ssmh txgtarget = 0; 468248575Ssmh } 469240868Spjd 470240868Spjd mutex_enter(&tm->tm_lock); 471248577Ssmh /* Loop until we have sent all outstanding free's */ 472248577Ssmh while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit)) 473248577Ssmh != NULL) { 474240868Spjd list_remove(&tm->tm_head, ts); 475240868Spjd avl_remove(&tm->tm_queued_frees, ts); 476240868Spjd avl_add(&tm->tm_inflight_frees, ts); 477248577Ssmh size = ts->ts_end - ts->ts_start; 478270312Ssmh offset = ts->ts_start; 479248577Ssmh TRIM_MAP_SDEC(tm, size); 480248577Ssmh TRIM_MAP_QDEC(tm); 481270312Ssmh /* 482270312Ssmh * We drop the lock while we call zio_nowait as the IO 483270312Ssmh * scheduler can result in a different IO being run e.g. 484270312Ssmh * a write which would result in a recursive lock. 485270312Ssmh */ 486270312Ssmh mutex_exit(&tm->tm_lock); 487270312Ssmh 488270312Ssmh zio_nowait(zio_trim(zio, spa, vd, offset, size)); 489270312Ssmh 490270312Ssmh mutex_enter(&tm->tm_lock); 491270312Ssmh ts = trim_map_first(tm, txgtarget, txgsafe, timelimit); 492240868Spjd } 493240868Spjd mutex_exit(&tm->tm_lock); 494240868Spjd} 495240868Spjd 496240868Spjdstatic void 497240868Spjdtrim_map_vdev_commit_done(spa_t *spa, vdev_t *vd) 498240868Spjd{ 499240868Spjd trim_map_t *tm = vd->vdev_trimmap; 500240868Spjd trim_seg_t *ts; 501240868Spjd list_t pending_writes; 502240868Spjd zio_t *zio; 503240868Spjd uint64_t start, size; 504240868Spjd void *cookie; 505240868Spjd 506240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 507240868Spjd 508240868Spjd if (tm == NULL) 509240868Spjd return; 510240868Spjd 511240868Spjd mutex_enter(&tm->tm_lock); 512240868Spjd if (!avl_is_empty(&tm->tm_inflight_frees)) { 513240868Spjd cookie = NULL; 514240868Spjd while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees, 515240868Spjd &cookie)) != NULL) { 516240868Spjd kmem_free(ts, sizeof (*ts)); 517240868Spjd } 518240868Spjd } 519240868Spjd list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t, 520240868Spjd io_trim_link)); 521240868Spjd list_move_tail(&pending_writes, &tm->tm_pending_writes); 522240868Spjd mutex_exit(&tm->tm_lock); 523240868Spjd 524240868Spjd while ((zio = list_remove_head(&pending_writes)) != NULL) { 525240868Spjd zio_vdev_io_reissue(zio); 526240868Spjd zio_execute(zio); 527240868Spjd } 528240868Spjd list_destroy(&pending_writes); 529240868Spjd} 530240868Spjd 531240868Spjdstatic void 532240868Spjdtrim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd) 533240868Spjd{ 534240868Spjd int c; 535240868Spjd 536248577Ssmh if (vd == NULL) 537240868Spjd return; 538240868Spjd 539240868Spjd if (vd->vdev_ops->vdev_op_leaf) { 540240868Spjd trim_map_vdev_commit(spa, zio, vd); 541240868Spjd } else { 542240868Spjd for (c = 0; c < vd->vdev_children; c++) 543240868Spjd trim_map_commit(spa, zio, vd->vdev_child[c]); 544240868Spjd } 545240868Spjd} 546240868Spjd 547240868Spjdstatic void 548240868Spjdtrim_map_commit_done(spa_t *spa, vdev_t *vd) 549240868Spjd{ 550240868Spjd int c; 551240868Spjd 552240868Spjd if (vd == NULL) 553240868Spjd return; 554240868Spjd 555240868Spjd if (vd->vdev_ops->vdev_op_leaf) { 556240868Spjd trim_map_vdev_commit_done(spa, vd); 557240868Spjd } else { 558240868Spjd for (c = 0; c < vd->vdev_children; c++) 559240868Spjd trim_map_commit_done(spa, vd->vdev_child[c]); 560240868Spjd } 561240868Spjd} 562240868Spjd 563240868Spjdstatic void 564240868Spjdtrim_thread(void *arg) 565240868Spjd{ 566240868Spjd spa_t *spa = arg; 567240868Spjd zio_t *zio; 568240868Spjd 569248576Ssmh#ifdef _KERNEL 570248576Ssmh (void) snprintf(curthread->td_name, sizeof(curthread->td_name), 571248576Ssmh "trim %s", spa_name(spa)); 572248576Ssmh#endif 573248576Ssmh 574240868Spjd for (;;) { 575240868Spjd mutex_enter(&spa->spa_trim_lock); 576240868Spjd if (spa->spa_trim_thread == NULL) { 577240868Spjd spa->spa_trim_thread = curthread; 578240868Spjd cv_signal(&spa->spa_trim_cv); 579240868Spjd mutex_exit(&spa->spa_trim_lock); 580240868Spjd thread_exit(); 581240868Spjd } 582248577Ssmh 583248577Ssmh (void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock, 584248577Ssmh hz * trim_max_interval); 585240868Spjd mutex_exit(&spa->spa_trim_lock); 586240868Spjd 587240868Spjd zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 588240868Spjd 589240868Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 590240868Spjd trim_map_commit(spa, zio, spa->spa_root_vdev); 591240868Spjd (void) zio_wait(zio); 592240868Spjd trim_map_commit_done(spa, spa->spa_root_vdev); 593240868Spjd spa_config_exit(spa, SCL_STATE, FTAG); 594240868Spjd } 595240868Spjd} 596240868Spjd 597240868Spjdvoid 598240868Spjdtrim_thread_create(spa_t *spa) 599240868Spjd{ 600240868Spjd 601249921Ssmh if (!zfs_trim_enabled) 602240868Spjd return; 603240868Spjd 604240868Spjd mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL); 605240868Spjd cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL); 606240868Spjd mutex_enter(&spa->spa_trim_lock); 607240868Spjd spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0, 608240868Spjd TS_RUN, minclsyspri); 609240868Spjd mutex_exit(&spa->spa_trim_lock); 610240868Spjd} 611240868Spjd 612240868Spjdvoid 613240868Spjdtrim_thread_destroy(spa_t *spa) 614240868Spjd{ 615240868Spjd 616249921Ssmh if (!zfs_trim_enabled) 617240868Spjd return; 618240868Spjd if (spa->spa_trim_thread == NULL) 619240868Spjd return; 620240868Spjd 621240868Spjd mutex_enter(&spa->spa_trim_lock); 622240868Spjd /* Setting spa_trim_thread to NULL tells the thread to stop. */ 623240868Spjd spa->spa_trim_thread = NULL; 624240868Spjd cv_signal(&spa->spa_trim_cv); 625240868Spjd /* The thread will set it back to != NULL on exit. */ 626240868Spjd while (spa->spa_trim_thread == NULL) 627240868Spjd cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock); 628240868Spjd spa->spa_trim_thread = NULL; 629240868Spjd mutex_exit(&spa->spa_trim_lock); 630240868Spjd 631240868Spjd cv_destroy(&spa->spa_trim_cv); 632240868Spjd mutex_destroy(&spa->spa_trim_lock); 633240868Spjd} 634240868Spjd 635240868Spjdvoid 636240868Spjdtrim_thread_wakeup(spa_t *spa) 637240868Spjd{ 638240868Spjd 639249921Ssmh if (!zfs_trim_enabled) 640240868Spjd return; 641240868Spjd if (spa->spa_trim_thread == NULL) 642240868Spjd return; 643240868Spjd 644240868Spjd mutex_enter(&spa->spa_trim_lock); 645240868Spjd cv_signal(&spa->spa_trim_cv); 646240868Spjd mutex_exit(&spa->spa_trim_lock); 647240868Spjd} 648