1255570Strasz/*- 2255570Strasz * Copyright (c) 2012 The FreeBSD Foundation 3255570Strasz * All rights reserved. 4255570Strasz * 5255570Strasz * This software was developed by Edward Tomasz Napierala under sponsorship 6255570Strasz * from the FreeBSD Foundation. 7255570Strasz * 8255570Strasz * Redistribution and use in source and binary forms, with or without 9255570Strasz * modification, are permitted provided that the following conditions 10255570Strasz * are met: 11255570Strasz * 1. Redistributions of source code must retain the above copyright 12255570Strasz * notice, this list of conditions and the following disclaimer. 13255570Strasz * 2. Redistributions in binary form must reproduce the above copyright 14255570Strasz * notice, this list of conditions and the following disclaimer in the 15255570Strasz * documentation and/or other materials provided with the distribution. 16255570Strasz * 17255570Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18255570Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19255570Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20255570Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21255570Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22255570Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23255570Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24255570Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25255570Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26255570Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27255570Strasz * SUCH DAMAGE. 28255570Strasz * 29255570Strasz */ 30255570Strasz 31255570Strasz/* 32255570Strasz * iSCSI Common Layer. It's used by both the initiator and target to send 33255570Strasz * and receive iSCSI PDUs. 34255570Strasz */ 35255570Strasz 36270888Strasz#include <sys/cdefs.h> 37270888Strasz__FBSDID("$FreeBSD$"); 38270888Strasz 39255570Strasz#include <sys/param.h> 40255570Strasz#include <sys/capability.h> 41255570Strasz#include <sys/condvar.h> 42255570Strasz#include <sys/conf.h> 43255570Strasz#include <sys/file.h> 44255570Strasz#include <sys/kernel.h> 45255570Strasz#include <sys/kthread.h> 46255570Strasz#include <sys/lock.h> 47255570Strasz#include <sys/mbuf.h> 48255570Strasz#include <sys/mutex.h> 49255570Strasz#include <sys/module.h> 50265524Strasz#include <sys/protosw.h> 51255570Strasz#include <sys/socket.h> 52255570Strasz#include <sys/socketvar.h> 53255570Strasz#include <sys/sysctl.h> 54255570Strasz#include <sys/systm.h> 55255570Strasz#include <sys/sx.h> 56255570Strasz#include <sys/uio.h> 57255570Strasz#include <vm/uma.h> 58255570Strasz#include <netinet/in.h> 59255570Strasz#include <netinet/tcp.h> 60255570Strasz 61270891Strasz#include <dev/iscsi/icl.h> 62270891Strasz#include <dev/iscsi/iscsi_proto.h> 63255570Strasz 64255570StraszSYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer"); 65255570Straszstatic int debug = 1; 66255570StraszTUNABLE_INT("kern.icl.debug", &debug); 67265501StraszSYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN, 68265531Strasz &debug, 0, "Enable debug messages"); 69265502Straszstatic int coalesce = 1; 70265502StraszTUNABLE_INT("kern.icl.coalesce", &coalesce); 71265502StraszSYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, 72265531Strasz &coalesce, 0, "Try to coalesce PDUs before sending"); 73265524Straszstatic int partial_receive_len = 128 * 1024; 74255570StraszTUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len); 75265501StraszSYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 76265531Strasz &partial_receive_len, 0, "Minimum read size for partially received " 77255570Strasz "data segment"); 78265498Straszstatic int sendspace = 1048576; 79265498StraszTUNABLE_INT("kern.icl.sendspace", &sendspace); 80265501StraszSYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, 81265531Strasz &sendspace, 0, "Default send socket buffer size"); 82265498Straszstatic int recvspace = 1048576; 83265498StraszTUNABLE_INT("kern.icl.recvspace", &recvspace); 84265501StraszSYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, 85265531Strasz &recvspace, 0, "Default receive socket buffer size"); 86255570Strasz 87255570Straszstatic uma_zone_t icl_conn_zone; 88255570Straszstatic uma_zone_t icl_pdu_zone; 89255570Strasz 90255570Straszstatic volatile u_int icl_ncons; 91255570Strasz 92265505Strasz#define ICL_DEBUG(X, ...) \ 93265505Strasz do { \ 94265505Strasz if (debug > 1) \ 95265505Strasz printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ 96255570Strasz } while (0) 97255570Strasz 98265505Strasz#define ICL_WARN(X, ...) \ 99265505Strasz do { \ 100265505Strasz if (debug > 0) { \ 101265505Strasz printf("WARNING: %s: " X "\n", \ 102265505Strasz __func__, ## __VA_ARGS__); \ 103265505Strasz } \ 104255570Strasz } while (0) 105255570Strasz 106265495Strasz#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 107265495Strasz#define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 108265495Strasz#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 109265495Strasz#define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 110255570Strasz 111265503StraszSTAILQ_HEAD(icl_pdu_stailq, icl_pdu); 112265503Strasz 113255570Straszstatic void 114255570Straszicl_conn_fail(struct icl_conn *ic) 115255570Strasz{ 116255570Strasz if (ic->ic_socket == NULL) 117255570Strasz return; 118255570Strasz 119255570Strasz /* 120255570Strasz * XXX 121255570Strasz */ 122255570Strasz ic->ic_socket->so_error = EDOOFUS; 123255570Strasz (ic->ic_error)(ic); 124255570Strasz} 125255570Strasz 126255570Straszstatic struct mbuf * 127255570Straszicl_conn_receive(struct icl_conn *ic, size_t len) 128255570Strasz{ 129255570Strasz struct uio uio; 130255570Strasz struct socket *so; 131255570Strasz struct mbuf *m; 132255570Strasz int error, flags; 133255570Strasz 134255570Strasz so = ic->ic_socket; 135255570Strasz 136255570Strasz memset(&uio, 0, sizeof(uio)); 137255570Strasz uio.uio_resid = len; 138255570Strasz 139255570Strasz flags = MSG_DONTWAIT; 140255570Strasz error = soreceive(so, NULL, &uio, &m, NULL, &flags); 141255570Strasz if (error != 0) { 142255570Strasz ICL_DEBUG("soreceive error %d", error); 143255570Strasz return (NULL); 144255570Strasz } 145255570Strasz if (uio.uio_resid != 0) { 146255570Strasz m_freem(m); 147255570Strasz ICL_DEBUG("short read"); 148255570Strasz return (NULL); 149255570Strasz } 150255570Strasz 151255570Strasz return (m); 152255570Strasz} 153255570Strasz 154255570Straszstatic struct icl_pdu * 155255570Straszicl_pdu_new(struct icl_conn *ic, int flags) 156255570Strasz{ 157255570Strasz struct icl_pdu *ip; 158255570Strasz 159265488Strasz#ifdef DIAGNOSTIC 160255570Strasz refcount_acquire(&ic->ic_outstanding_pdus); 161265488Strasz#endif 162255570Strasz ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 163255570Strasz if (ip == NULL) { 164255570Strasz ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 165265488Strasz#ifdef DIAGNOSTIC 166255570Strasz refcount_release(&ic->ic_outstanding_pdus); 167265488Strasz#endif 168255570Strasz return (NULL); 169255570Strasz } 170255570Strasz 171255570Strasz ip->ip_conn = ic; 172255570Strasz 173255570Strasz return (ip); 174255570Strasz} 175255570Strasz 176255570Straszvoid 177255570Straszicl_pdu_free(struct icl_pdu *ip) 178255570Strasz{ 179255570Strasz struct icl_conn *ic; 180255570Strasz 181255570Strasz ic = ip->ip_conn; 182255570Strasz 183255570Strasz m_freem(ip->ip_bhs_mbuf); 184255570Strasz m_freem(ip->ip_ahs_mbuf); 185255570Strasz m_freem(ip->ip_data_mbuf); 186255570Strasz uma_zfree(icl_pdu_zone, ip); 187265488Strasz#ifdef DIAGNOSTIC 188255570Strasz refcount_release(&ic->ic_outstanding_pdus); 189265488Strasz#endif 190255570Strasz} 191255570Strasz 192255570Strasz/* 193255570Strasz * Allocate icl_pdu with empty BHS to fill up by the caller. 194255570Strasz */ 195255570Straszstruct icl_pdu * 196255570Straszicl_pdu_new_bhs(struct icl_conn *ic, int flags) 197255570Strasz{ 198255570Strasz struct icl_pdu *ip; 199255570Strasz 200255570Strasz ip = icl_pdu_new(ic, flags); 201255570Strasz if (ip == NULL) 202255570Strasz return (NULL); 203255570Strasz 204255570Strasz ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), 205255570Strasz flags, MT_DATA, M_PKTHDR); 206255570Strasz if (ip->ip_bhs_mbuf == NULL) { 207255570Strasz ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); 208255570Strasz icl_pdu_free(ip); 209255570Strasz return (NULL); 210255570Strasz } 211255570Strasz ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); 212255570Strasz memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); 213255570Strasz ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); 214255570Strasz 215255570Strasz return (ip); 216255570Strasz} 217255570Strasz 218255570Straszstatic int 219255570Straszicl_pdu_ahs_length(const struct icl_pdu *request) 220255570Strasz{ 221255570Strasz 222255570Strasz return (request->ip_bhs->bhs_total_ahs_len * 4); 223255570Strasz} 224255570Strasz 225255570Straszsize_t 226255570Straszicl_pdu_data_segment_length(const struct icl_pdu *request) 227255570Strasz{ 228255570Strasz uint32_t len = 0; 229255570Strasz 230255570Strasz len += request->ip_bhs->bhs_data_segment_len[0]; 231255570Strasz len <<= 8; 232255570Strasz len += request->ip_bhs->bhs_data_segment_len[1]; 233255570Strasz len <<= 8; 234255570Strasz len += request->ip_bhs->bhs_data_segment_len[2]; 235255570Strasz 236255570Strasz return (len); 237255570Strasz} 238255570Strasz 239255570Straszstatic void 240255570Straszicl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) 241255570Strasz{ 242255570Strasz 243255570Strasz response->ip_bhs->bhs_data_segment_len[2] = len; 244255570Strasz response->ip_bhs->bhs_data_segment_len[1] = len >> 8; 245255570Strasz response->ip_bhs->bhs_data_segment_len[0] = len >> 16; 246255570Strasz} 247255570Strasz 248255570Straszstatic size_t 249255570Straszicl_pdu_padding(const struct icl_pdu *ip) 250255570Strasz{ 251255570Strasz 252255570Strasz if ((ip->ip_data_len % 4) != 0) 253255570Strasz return (4 - (ip->ip_data_len % 4)); 254255570Strasz 255255570Strasz return (0); 256255570Strasz} 257255570Strasz 258255570Straszstatic size_t 259255570Straszicl_pdu_size(const struct icl_pdu *response) 260255570Strasz{ 261255570Strasz size_t len; 262255570Strasz 263255570Strasz KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); 264255570Strasz 265255570Strasz len = sizeof(struct iscsi_bhs) + response->ip_data_len + 266255570Strasz icl_pdu_padding(response); 267255570Strasz if (response->ip_conn->ic_header_crc32c) 268255570Strasz len += ISCSI_HEADER_DIGEST_SIZE; 269261770Strasz if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) 270255570Strasz len += ISCSI_DATA_DIGEST_SIZE; 271255570Strasz 272255570Strasz return (len); 273255570Strasz} 274255570Strasz 275255570Straszstatic int 276255570Straszicl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) 277255570Strasz{ 278255570Strasz struct mbuf *m; 279255570Strasz 280255570Strasz m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); 281255570Strasz if (m == NULL) { 282255570Strasz ICL_DEBUG("failed to receive BHS"); 283255570Strasz return (-1); 284255570Strasz } 285255570Strasz 286255570Strasz request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); 287255570Strasz if (request->ip_bhs_mbuf == NULL) { 288255570Strasz ICL_WARN("m_pullup failed"); 289255570Strasz return (-1); 290255570Strasz } 291255570Strasz request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); 292255570Strasz 293255570Strasz /* 294255570Strasz * XXX: For architectures with strict alignment requirements 295255570Strasz * we may need to allocate ip_bhs and copy the data into it. 296255570Strasz * For some reason, though, not doing this doesn't seem 297255570Strasz * to cause problems; tested on sparc64. 298255570Strasz */ 299255570Strasz 300255570Strasz *availablep -= sizeof(struct iscsi_bhs); 301255570Strasz return (0); 302255570Strasz} 303255570Strasz 304255570Straszstatic int 305255570Straszicl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) 306255570Strasz{ 307255570Strasz 308255570Strasz request->ip_ahs_len = icl_pdu_ahs_length(request); 309255570Strasz if (request->ip_ahs_len == 0) 310255570Strasz return (0); 311255570Strasz 312255570Strasz request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, 313255570Strasz request->ip_ahs_len); 314255570Strasz if (request->ip_ahs_mbuf == NULL) { 315255570Strasz ICL_DEBUG("failed to receive AHS"); 316255570Strasz return (-1); 317255570Strasz } 318255570Strasz 319255570Strasz *availablep -= request->ip_ahs_len; 320255570Strasz return (0); 321255570Strasz} 322255570Strasz 323255570Straszstatic uint32_t 324255570Straszicl_mbuf_to_crc32c(const struct mbuf *m0) 325255570Strasz{ 326255570Strasz uint32_t digest = 0xffffffff; 327255570Strasz const struct mbuf *m; 328255570Strasz 329255570Strasz for (m = m0; m != NULL; m = m->m_next) 330255570Strasz digest = calculate_crc32c(digest, 331255570Strasz mtod(m, const void *), m->m_len); 332255570Strasz 333255570Strasz digest = digest ^ 0xffffffff; 334255570Strasz 335255570Strasz return (digest); 336255570Strasz} 337255570Strasz 338255570Straszstatic int 339255570Straszicl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) 340255570Strasz{ 341255570Strasz struct mbuf *m; 342255570Strasz uint32_t received_digest, valid_digest; 343255570Strasz 344255570Strasz if (request->ip_conn->ic_header_crc32c == false) 345255570Strasz return (0); 346255570Strasz 347255570Strasz m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); 348255570Strasz if (m == NULL) { 349255570Strasz ICL_DEBUG("failed to receive header digest"); 350255570Strasz return (-1); 351255570Strasz } 352255570Strasz 353255570Strasz CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); 354261771Strasz m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); 355255570Strasz m_freem(m); 356255570Strasz 357255570Strasz *availablep -= ISCSI_HEADER_DIGEST_SIZE; 358255570Strasz 359255570Strasz /* 360255570Strasz * XXX: Handle AHS. 361255570Strasz */ 362255570Strasz valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 363255570Strasz if (received_digest != valid_digest) { 364255570Strasz ICL_WARN("header digest check failed; got 0x%x, " 365255570Strasz "should be 0x%x", received_digest, valid_digest); 366255570Strasz return (-1); 367255570Strasz } 368255570Strasz 369255570Strasz return (0); 370255570Strasz} 371255570Strasz 372255570Strasz/* 373255570Strasz * Return the number of bytes that should be waiting in the receive socket 374255570Strasz * before icl_pdu_receive_data_segment() gets called. 375255570Strasz */ 376255570Straszstatic size_t 377255570Straszicl_pdu_data_segment_receive_len(const struct icl_pdu *request) 378255570Strasz{ 379255570Strasz size_t len; 380255570Strasz 381255570Strasz len = icl_pdu_data_segment_length(request); 382255570Strasz if (len == 0) 383255570Strasz return (0); 384255570Strasz 385255570Strasz /* 386255570Strasz * Account for the parts of data segment already read from 387255570Strasz * the socket buffer. 388255570Strasz */ 389255570Strasz KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 390255570Strasz len -= request->ip_data_len; 391255570Strasz 392255570Strasz /* 393255570Strasz * Don't always wait for the full data segment to be delivered 394255570Strasz * to the socket; this might badly affect performance due to 395255570Strasz * TCP window scaling. 396255570Strasz */ 397255570Strasz if (len > partial_receive_len) { 398255570Strasz#if 0 399255570Strasz ICL_DEBUG("need %zd bytes of data, limiting to %zd", 400255570Strasz len, partial_receive_len)); 401255570Strasz#endif 402255570Strasz len = partial_receive_len; 403255570Strasz 404255570Strasz return (len); 405255570Strasz } 406255570Strasz 407255570Strasz /* 408255570Strasz * Account for padding. Note that due to the way code is written, 409255570Strasz * the icl_pdu_receive_data_segment() must always receive padding 410255570Strasz * along with the last part of data segment, because it would be 411255570Strasz * impossible to tell whether we've already received the full data 412255570Strasz * segment including padding, or without it. 413255570Strasz */ 414255570Strasz if ((len % 4) != 0) 415255570Strasz len += 4 - (len % 4); 416255570Strasz 417255570Strasz#if 0 418255570Strasz ICL_DEBUG("need %zd bytes of data", len)); 419255570Strasz#endif 420255570Strasz 421255570Strasz return (len); 422255570Strasz} 423255570Strasz 424255570Straszstatic int 425255570Straszicl_pdu_receive_data_segment(struct icl_pdu *request, 426255570Strasz size_t *availablep, bool *more_neededp) 427255570Strasz{ 428255570Strasz struct icl_conn *ic; 429255570Strasz size_t len, padding = 0; 430255570Strasz struct mbuf *m; 431255570Strasz 432255570Strasz ic = request->ip_conn; 433255570Strasz 434255570Strasz *more_neededp = false; 435255570Strasz ic->ic_receive_len = 0; 436255570Strasz 437255570Strasz len = icl_pdu_data_segment_length(request); 438255570Strasz if (len == 0) 439255570Strasz return (0); 440255570Strasz 441255570Strasz if ((len % 4) != 0) 442255570Strasz padding = 4 - (len % 4); 443255570Strasz 444255570Strasz /* 445255570Strasz * Account for already received parts of data segment. 446255570Strasz */ 447255570Strasz KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); 448255570Strasz len -= request->ip_data_len; 449255570Strasz 450255570Strasz if (len + padding > *availablep) { 451255570Strasz /* 452255570Strasz * Not enough data in the socket buffer. Receive as much 453255570Strasz * as we can. Don't receive padding, since, obviously, it's 454255570Strasz * not the end of data segment yet. 455255570Strasz */ 456255570Strasz#if 0 457255570Strasz ICL_DEBUG("limited from %zd to %zd", 458255570Strasz len + padding, *availablep - padding)); 459255570Strasz#endif 460255570Strasz len = *availablep - padding; 461255570Strasz *more_neededp = true; 462255570Strasz padding = 0; 463255570Strasz } 464255570Strasz 465255570Strasz /* 466255570Strasz * Must not try to receive padding without at least one byte 467255570Strasz * of actual data segment. 468255570Strasz */ 469255570Strasz if (len > 0) { 470255570Strasz m = icl_conn_receive(request->ip_conn, len + padding); 471255570Strasz if (m == NULL) { 472255570Strasz ICL_DEBUG("failed to receive data segment"); 473255570Strasz return (-1); 474255570Strasz } 475255570Strasz 476255570Strasz if (request->ip_data_mbuf == NULL) 477255570Strasz request->ip_data_mbuf = m; 478255570Strasz else 479255570Strasz m_cat(request->ip_data_mbuf, m); 480255570Strasz 481255570Strasz request->ip_data_len += len; 482255570Strasz *availablep -= len + padding; 483255570Strasz } else 484255570Strasz ICL_DEBUG("len 0"); 485255570Strasz 486255570Strasz if (*more_neededp) 487255570Strasz ic->ic_receive_len = 488255570Strasz icl_pdu_data_segment_receive_len(request); 489255570Strasz 490255570Strasz return (0); 491255570Strasz} 492255570Strasz 493255570Straszstatic int 494255570Straszicl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) 495255570Strasz{ 496255570Strasz struct mbuf *m; 497255570Strasz uint32_t received_digest, valid_digest; 498255570Strasz 499255570Strasz if (request->ip_conn->ic_data_crc32c == false) 500255570Strasz return (0); 501255570Strasz 502255570Strasz if (request->ip_data_len == 0) 503255570Strasz return (0); 504255570Strasz 505255570Strasz m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); 506255570Strasz if (m == NULL) { 507255570Strasz ICL_DEBUG("failed to receive data digest"); 508255570Strasz return (-1); 509255570Strasz } 510255570Strasz 511255570Strasz CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); 512261771Strasz m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); 513255570Strasz m_freem(m); 514255570Strasz 515255570Strasz *availablep -= ISCSI_DATA_DIGEST_SIZE; 516255570Strasz 517255570Strasz /* 518255570Strasz * Note that ip_data_mbuf also contains padding; since digest 519255570Strasz * calculation is supposed to include that, we iterate over 520255570Strasz * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. 521255570Strasz */ 522255570Strasz valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 523255570Strasz if (received_digest != valid_digest) { 524255570Strasz ICL_WARN("data digest check failed; got 0x%x, " 525255570Strasz "should be 0x%x", received_digest, valid_digest); 526255570Strasz return (-1); 527255570Strasz } 528255570Strasz 529255570Strasz return (0); 530255570Strasz} 531255570Strasz 532255570Strasz/* 533255570Strasz * Somewhat contrary to the name, this attempts to receive only one 534255570Strasz * "part" of PDU at a time; call it repeatedly until it returns non-NULL. 535255570Strasz */ 536255570Straszstatic struct icl_pdu * 537255570Straszicl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) 538255570Strasz{ 539255570Strasz struct icl_pdu *request; 540255570Strasz struct socket *so; 541255570Strasz size_t len; 542255570Strasz int error; 543255570Strasz bool more_needed; 544255570Strasz 545255570Strasz so = ic->ic_socket; 546255570Strasz 547255570Strasz if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { 548255570Strasz KASSERT(ic->ic_receive_pdu == NULL, 549255570Strasz ("ic->ic_receive_pdu != NULL")); 550255570Strasz request = icl_pdu_new(ic, M_NOWAIT); 551255570Strasz if (request == NULL) { 552255570Strasz ICL_DEBUG("failed to allocate PDU; " 553255570Strasz "dropping connection"); 554255570Strasz icl_conn_fail(ic); 555255570Strasz return (NULL); 556255570Strasz } 557255570Strasz ic->ic_receive_pdu = request; 558255570Strasz } else { 559255570Strasz KASSERT(ic->ic_receive_pdu != NULL, 560255570Strasz ("ic->ic_receive_pdu == NULL")); 561255570Strasz request = ic->ic_receive_pdu; 562255570Strasz } 563255570Strasz 564255570Strasz if (*availablep < ic->ic_receive_len) { 565255570Strasz#if 0 566255570Strasz ICL_DEBUG("not enough data; need %zd, " 567255570Strasz "have %zd", ic->ic_receive_len, *availablep); 568255570Strasz#endif 569255570Strasz return (NULL); 570255570Strasz } 571255570Strasz 572255570Strasz switch (ic->ic_receive_state) { 573255570Strasz case ICL_CONN_STATE_BHS: 574255570Strasz //ICL_DEBUG("receiving BHS"); 575255570Strasz error = icl_pdu_receive_bhs(request, availablep); 576255570Strasz if (error != 0) { 577255570Strasz ICL_DEBUG("failed to receive BHS; " 578255570Strasz "dropping connection"); 579255570Strasz break; 580255570Strasz } 581255570Strasz 582255570Strasz /* 583255570Strasz * We don't enforce any limit for AHS length; 584255570Strasz * its length is stored in 8 bit field. 585255570Strasz */ 586255570Strasz 587255570Strasz len = icl_pdu_data_segment_length(request); 588255570Strasz if (len > ic->ic_max_data_segment_length) { 589255570Strasz ICL_WARN("received data segment " 590255570Strasz "length %zd is larger than negotiated " 591255570Strasz "MaxDataSegmentLength %zd; " 592255570Strasz "dropping connection", 593255570Strasz len, ic->ic_max_data_segment_length); 594256228Strasz error = EINVAL; 595255570Strasz break; 596255570Strasz } 597255570Strasz 598255570Strasz ic->ic_receive_state = ICL_CONN_STATE_AHS; 599255570Strasz ic->ic_receive_len = icl_pdu_ahs_length(request); 600255570Strasz break; 601255570Strasz 602255570Strasz case ICL_CONN_STATE_AHS: 603255570Strasz //ICL_DEBUG("receiving AHS"); 604255570Strasz error = icl_pdu_receive_ahs(request, availablep); 605255570Strasz if (error != 0) { 606255570Strasz ICL_DEBUG("failed to receive AHS; " 607255570Strasz "dropping connection"); 608255570Strasz break; 609255570Strasz } 610255570Strasz ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; 611255570Strasz if (ic->ic_header_crc32c == false) 612255570Strasz ic->ic_receive_len = 0; 613255570Strasz else 614255570Strasz ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; 615255570Strasz break; 616255570Strasz 617255570Strasz case ICL_CONN_STATE_HEADER_DIGEST: 618255570Strasz //ICL_DEBUG("receiving header digest"); 619255570Strasz error = icl_pdu_check_header_digest(request, availablep); 620255570Strasz if (error != 0) { 621255570Strasz ICL_DEBUG("header digest failed; " 622255570Strasz "dropping connection"); 623255570Strasz break; 624255570Strasz } 625255570Strasz 626255570Strasz ic->ic_receive_state = ICL_CONN_STATE_DATA; 627255570Strasz ic->ic_receive_len = 628255570Strasz icl_pdu_data_segment_receive_len(request); 629255570Strasz break; 630255570Strasz 631255570Strasz case ICL_CONN_STATE_DATA: 632255570Strasz //ICL_DEBUG("receiving data segment"); 633255570Strasz error = icl_pdu_receive_data_segment(request, availablep, 634255570Strasz &more_needed); 635255570Strasz if (error != 0) { 636255570Strasz ICL_DEBUG("failed to receive data segment;" 637255570Strasz "dropping connection"); 638255570Strasz break; 639255570Strasz } 640255570Strasz 641255570Strasz if (more_needed) 642255570Strasz break; 643255570Strasz 644255570Strasz ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; 645261770Strasz if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) 646255570Strasz ic->ic_receive_len = 0; 647255570Strasz else 648255570Strasz ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; 649255570Strasz break; 650255570Strasz 651255570Strasz case ICL_CONN_STATE_DATA_DIGEST: 652255570Strasz //ICL_DEBUG("receiving data digest"); 653255570Strasz error = icl_pdu_check_data_digest(request, availablep); 654255570Strasz if (error != 0) { 655255570Strasz ICL_DEBUG("data digest failed; " 656255570Strasz "dropping connection"); 657255570Strasz break; 658255570Strasz } 659255570Strasz 660255570Strasz /* 661255570Strasz * We've received complete PDU; reset the receive state machine 662255570Strasz * and return the PDU. 663255570Strasz */ 664255570Strasz ic->ic_receive_state = ICL_CONN_STATE_BHS; 665255570Strasz ic->ic_receive_len = sizeof(struct iscsi_bhs); 666255570Strasz ic->ic_receive_pdu = NULL; 667255570Strasz return (request); 668255570Strasz 669255570Strasz default: 670255570Strasz panic("invalid ic_receive_state %d\n", ic->ic_receive_state); 671255570Strasz } 672255570Strasz 673255570Strasz if (error != 0) { 674269925Strasz /* 675269925Strasz * Don't free the PDU; it's pointed to by ic->ic_receive_pdu 676269925Strasz * and will get freed in icl_conn_close(). 677269925Strasz */ 678255570Strasz icl_conn_fail(ic); 679255570Strasz } 680255570Strasz 681255570Strasz return (NULL); 682255570Strasz} 683255570Strasz 684255570Straszstatic void 685255570Straszicl_conn_receive_pdus(struct icl_conn *ic, size_t available) 686255570Strasz{ 687255570Strasz struct icl_pdu *response; 688255570Strasz struct socket *so; 689255570Strasz 690255570Strasz so = ic->ic_socket; 691255570Strasz 692255570Strasz /* 693255570Strasz * This can never happen; we're careful to only mess with ic->ic_socket 694255570Strasz * pointer when the send/receive threads are not running. 695255570Strasz */ 696255570Strasz KASSERT(so != NULL, ("NULL socket")); 697255570Strasz 698255570Strasz for (;;) { 699255570Strasz if (ic->ic_disconnecting) 700255570Strasz return; 701255570Strasz 702255570Strasz if (so->so_error != 0) { 703255570Strasz ICL_DEBUG("connection error %d; " 704255570Strasz "dropping connection", so->so_error); 705255570Strasz icl_conn_fail(ic); 706255570Strasz return; 707255570Strasz } 708255570Strasz 709255570Strasz /* 710255570Strasz * Loop until we have a complete PDU or there is not enough 711255570Strasz * data in the socket buffer. 712255570Strasz */ 713255570Strasz if (available < ic->ic_receive_len) { 714255570Strasz#if 0 715255570Strasz ICL_DEBUG("not enough data; have %zd, " 716255570Strasz "need %zd", available, 717255570Strasz ic->ic_receive_len); 718255570Strasz#endif 719255570Strasz return; 720255570Strasz } 721255570Strasz 722255570Strasz response = icl_conn_receive_pdu(ic, &available); 723255570Strasz if (response == NULL) 724255570Strasz continue; 725255570Strasz 726255570Strasz if (response->ip_ahs_len > 0) { 727255570Strasz ICL_WARN("received PDU with unsupported " 728255570Strasz "AHS; opcode 0x%x; dropping connection", 729255570Strasz response->ip_bhs->bhs_opcode); 730255570Strasz icl_pdu_free(response); 731255570Strasz icl_conn_fail(ic); 732255570Strasz return; 733255570Strasz } 734255570Strasz 735255570Strasz (ic->ic_receive)(response); 736255570Strasz } 737255570Strasz} 738255570Strasz 739255570Straszstatic void 740255570Straszicl_receive_thread(void *arg) 741255570Strasz{ 742255570Strasz struct icl_conn *ic; 743255570Strasz size_t available; 744255570Strasz struct socket *so; 745255570Strasz 746255570Strasz ic = arg; 747255570Strasz so = ic->ic_socket; 748255570Strasz 749255570Strasz ICL_CONN_LOCK(ic); 750255570Strasz ic->ic_receive_running = true; 751255570Strasz ICL_CONN_UNLOCK(ic); 752255570Strasz 753255570Strasz for (;;) { 754255570Strasz if (ic->ic_disconnecting) { 755255570Strasz //ICL_DEBUG("terminating"); 756257339Strasz break; 757255570Strasz } 758255570Strasz 759265524Strasz /* 760265524Strasz * Set the low watermark, to be checked by 761265524Strasz * soreadable() in icl_soupcall_receive() 762265524Strasz * to avoid unneccessary wakeups until there 763265524Strasz * is enough data received to read the PDU. 764265524Strasz */ 765255570Strasz SOCKBUF_LOCK(&so->so_rcv); 766255570Strasz available = so->so_rcv.sb_cc; 767255570Strasz if (available < ic->ic_receive_len) { 768255570Strasz so->so_rcv.sb_lowat = ic->ic_receive_len; 769255570Strasz cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); 770265524Strasz } else 771265524Strasz so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; 772255570Strasz SOCKBUF_UNLOCK(&so->so_rcv); 773255570Strasz 774255570Strasz icl_conn_receive_pdus(ic, available); 775255570Strasz } 776257339Strasz 777257339Strasz ICL_CONN_LOCK(ic); 778257339Strasz ic->ic_receive_running = false; 779257339Strasz ICL_CONN_UNLOCK(ic); 780257339Strasz kthread_exit(); 781255570Strasz} 782255570Strasz 783255570Straszstatic int 784255570Straszicl_soupcall_receive(struct socket *so, void *arg, int waitflag) 785255570Strasz{ 786255570Strasz struct icl_conn *ic; 787255570Strasz 788265524Strasz if (!soreadable(so)) 789265524Strasz return (SU_OK); 790265524Strasz 791255570Strasz ic = arg; 792255570Strasz cv_signal(&ic->ic_receive_cv); 793255570Strasz return (SU_OK); 794255570Strasz} 795255570Strasz 796255570Straszstatic int 797265502Straszicl_pdu_finalize(struct icl_pdu *request) 798255570Strasz{ 799255570Strasz size_t padding, pdu_len; 800255570Strasz uint32_t digest, zero = 0; 801265502Strasz int ok; 802255570Strasz struct icl_conn *ic; 803255570Strasz 804255570Strasz ic = request->ip_conn; 805255570Strasz 806255570Strasz icl_pdu_set_data_segment_length(request, request->ip_data_len); 807255570Strasz 808255570Strasz pdu_len = icl_pdu_size(request); 809255570Strasz 810255570Strasz if (ic->ic_header_crc32c) { 811255570Strasz digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); 812255570Strasz ok = m_append(request->ip_bhs_mbuf, sizeof(digest), 813255570Strasz (void *)&digest); 814255570Strasz if (ok != 1) { 815255570Strasz ICL_WARN("failed to append header digest"); 816255570Strasz return (1); 817255570Strasz } 818255570Strasz } 819255570Strasz 820255570Strasz if (request->ip_data_len != 0) { 821255570Strasz padding = icl_pdu_padding(request); 822255570Strasz if (padding > 0) { 823255570Strasz ok = m_append(request->ip_data_mbuf, padding, 824255570Strasz (void *)&zero); 825255570Strasz if (ok != 1) { 826255570Strasz ICL_WARN("failed to append padding"); 827255570Strasz return (1); 828255570Strasz } 829255570Strasz } 830255570Strasz 831255570Strasz if (ic->ic_data_crc32c) { 832255570Strasz digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); 833255570Strasz 834255570Strasz ok = m_append(request->ip_data_mbuf, sizeof(digest), 835255570Strasz (void *)&digest); 836255570Strasz if (ok != 1) { 837265502Strasz ICL_WARN("failed to append data digest"); 838255570Strasz return (1); 839255570Strasz } 840255570Strasz } 841255570Strasz 842255570Strasz m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); 843255570Strasz request->ip_data_mbuf = NULL; 844255570Strasz } 845255570Strasz 846255570Strasz request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; 847255570Strasz 848255570Strasz return (0); 849255570Strasz} 850255570Strasz 851255570Straszstatic void 852265503Straszicl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) 853255570Strasz{ 854265502Strasz struct icl_pdu *request, *request2; 855255570Strasz struct socket *so; 856265502Strasz size_t available, size, size2; 857265502Strasz int coalesced, error; 858255570Strasz 859265502Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 860255570Strasz 861255570Strasz so = ic->ic_socket; 862255570Strasz 863255570Strasz SOCKBUF_LOCK(&so->so_snd); 864265502Strasz /* 865265502Strasz * Check how much space do we have for transmit. We can't just 866265502Strasz * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, 867265502Strasz * as it always frees the mbuf chain passed to it, even in case 868265502Strasz * of error. 869265502Strasz */ 870255570Strasz available = sbspace(&so->so_snd); 871265502Strasz 872265502Strasz /* 873265524Strasz * Notify the socket upcall that we don't need wakeups 874265524Strasz * for the time being. 875265502Strasz */ 876265524Strasz so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; 877255570Strasz SOCKBUF_UNLOCK(&so->so_snd); 878255570Strasz 879265502Strasz while (!STAILQ_EMPTY(queue)) { 880255570Strasz if (ic->ic_disconnecting) 881255570Strasz return; 882265502Strasz request = STAILQ_FIRST(queue); 883255570Strasz size = icl_pdu_size(request); 884255570Strasz if (available < size) { 885265502Strasz 886255570Strasz /* 887265524Strasz * Set the low watermark, to be checked by 888265525Strasz * sowriteable() in icl_soupcall_send() 889265502Strasz * to avoid unneccessary wakeups until there 890265502Strasz * is enough space for the PDU to fit. 891255570Strasz */ 892255570Strasz SOCKBUF_LOCK(&so->so_snd); 893265524Strasz available = sbspace(&so->so_snd); 894265524Strasz if (available < size) { 895265524Strasz#if 1 896265524Strasz ICL_DEBUG("no space to send; " 897265524Strasz "have %zd, need %zd", 898265524Strasz available, size); 899265524Strasz#endif 900265524Strasz so->so_snd.sb_lowat = size; 901265524Strasz SOCKBUF_UNLOCK(&so->so_snd); 902265524Strasz return; 903265524Strasz } 904255570Strasz SOCKBUF_UNLOCK(&so->so_snd); 905255570Strasz } 906265502Strasz STAILQ_REMOVE_HEAD(queue, ip_next); 907265502Strasz error = icl_pdu_finalize(request); 908255570Strasz if (error != 0) { 909265502Strasz ICL_DEBUG("failed to finalize PDU; " 910255570Strasz "dropping connection"); 911255570Strasz icl_conn_fail(ic); 912265502Strasz icl_pdu_free(request); 913255570Strasz return; 914265502Strasz } 915265502Strasz if (coalesce) { 916265502Strasz coalesced = 1; 917265502Strasz for (;;) { 918265502Strasz request2 = STAILQ_FIRST(queue); 919265502Strasz if (request2 == NULL) 920265502Strasz break; 921265502Strasz size2 = icl_pdu_size(request2); 922265502Strasz if (available < size + size2) 923265502Strasz break; 924265502Strasz STAILQ_REMOVE_HEAD(queue, ip_next); 925265502Strasz error = icl_pdu_finalize(request2); 926265502Strasz if (error != 0) { 927265502Strasz ICL_DEBUG("failed to finalize PDU; " 928265502Strasz "dropping connection"); 929265502Strasz icl_conn_fail(ic); 930265502Strasz icl_pdu_free(request); 931265502Strasz icl_pdu_free(request2); 932265502Strasz return; 933265502Strasz } 934265502Strasz m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); 935265502Strasz request2->ip_bhs_mbuf = NULL; 936265502Strasz request->ip_bhs_mbuf->m_pkthdr.len += size2; 937265502Strasz size += size2; 938265502Strasz STAILQ_REMOVE_AFTER(queue, request, ip_next); 939265502Strasz icl_pdu_free(request2); 940265502Strasz coalesced++; 941265502Strasz } 942265502Strasz#if 0 943265502Strasz if (coalesced > 1) { 944265502Strasz ICL_DEBUG("coalesced %d PDUs into %zd bytes", 945265502Strasz coalesced, size); 946265502Strasz } 947265502Strasz#endif 948265502Strasz } 949265502Strasz available -= size; 950265502Strasz error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, 951265502Strasz NULL, MSG_DONTWAIT, curthread); 952265502Strasz request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ 953265502Strasz if (error != 0) { 954265502Strasz ICL_DEBUG("failed to send PDU, error %d; " 955265502Strasz "dropping connection", error); 956265502Strasz icl_conn_fail(ic); 957265502Strasz icl_pdu_free(request); 958265502Strasz return; 959265502Strasz } 960255570Strasz icl_pdu_free(request); 961255570Strasz } 962255570Strasz} 963255570Strasz 964255570Straszstatic void 965255570Straszicl_send_thread(void *arg) 966255570Strasz{ 967255570Strasz struct icl_conn *ic; 968265503Strasz struct icl_pdu_stailq queue; 969255570Strasz 970255570Strasz ic = arg; 971255570Strasz 972265502Strasz STAILQ_INIT(&queue); 973265502Strasz 974255570Strasz ICL_CONN_LOCK(ic); 975255570Strasz ic->ic_send_running = true; 976255570Strasz 977255570Strasz for (;;) { 978255570Strasz if (ic->ic_disconnecting) { 979255570Strasz //ICL_DEBUG("terminating"); 980257339Strasz break; 981255570Strasz } 982265502Strasz 983265502Strasz for (;;) { 984265502Strasz /* 985265502Strasz * If the local queue is empty, populate it from 986265502Strasz * the main one. This way the icl_conn_send_pdus() 987265502Strasz * can go through all the queued PDUs without holding 988265502Strasz * any locks. 989265502Strasz */ 990265502Strasz if (STAILQ_EMPTY(&queue)) 991265502Strasz STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); 992265502Strasz 993265502Strasz ic->ic_check_send_space = false; 994265502Strasz ICL_CONN_UNLOCK(ic); 995265502Strasz icl_conn_send_pdus(ic, &queue); 996265502Strasz ICL_CONN_LOCK(ic); 997265502Strasz 998265502Strasz /* 999265502Strasz * The icl_soupcall_send() was called since the last 1000265502Strasz * call to sbspace(); go around; 1001265502Strasz */ 1002265502Strasz if (ic->ic_check_send_space) 1003265502Strasz continue; 1004265502Strasz 1005265502Strasz /* 1006265502Strasz * Local queue is empty, but we still have PDUs 1007265502Strasz * in the main one; go around. 1008265502Strasz */ 1009265502Strasz if (STAILQ_EMPTY(&queue) && 1010265502Strasz !STAILQ_EMPTY(&ic->ic_to_send)) 1011265502Strasz continue; 1012265502Strasz 1013265502Strasz /* 1014265502Strasz * There might be some stuff in the local queue, 1015265502Strasz * which didn't get sent due to not having enough send 1016265502Strasz * space. Wait for socket upcall. 1017265502Strasz */ 1018265502Strasz break; 1019265502Strasz } 1020265502Strasz 1021265495Strasz cv_wait(&ic->ic_send_cv, ic->ic_lock); 1022255570Strasz } 1023257339Strasz 1024265502Strasz /* 1025265502Strasz * We're exiting; move PDUs back to the main queue, so they can 1026265502Strasz * get freed properly. At this point ordering doesn't matter. 1027265502Strasz */ 1028265502Strasz STAILQ_CONCAT(&ic->ic_to_send, &queue); 1029265502Strasz 1030257339Strasz ic->ic_send_running = false; 1031257339Strasz ICL_CONN_UNLOCK(ic); 1032257339Strasz kthread_exit(); 1033255570Strasz} 1034255570Strasz 1035255570Straszstatic int 1036255570Straszicl_soupcall_send(struct socket *so, void *arg, int waitflag) 1037255570Strasz{ 1038255570Strasz struct icl_conn *ic; 1039255570Strasz 1040265524Strasz if (!sowriteable(so)) 1041265524Strasz return (SU_OK); 1042265524Strasz 1043255570Strasz ic = arg; 1044265502Strasz 1045265502Strasz ICL_CONN_LOCK(ic); 1046265502Strasz ic->ic_check_send_space = true; 1047265502Strasz ICL_CONN_UNLOCK(ic); 1048265502Strasz 1049255570Strasz cv_signal(&ic->ic_send_cv); 1050265502Strasz 1051255570Strasz return (SU_OK); 1052255570Strasz} 1053255570Strasz 1054255570Straszint 1055265502Straszicl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, 1056265502Strasz int flags) 1057255570Strasz{ 1058255570Strasz struct mbuf *mb, *newmb; 1059255570Strasz size_t copylen, off = 0; 1060255570Strasz 1061255570Strasz KASSERT(len > 0, ("len == 0")); 1062255570Strasz 1063255570Strasz newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); 1064255570Strasz if (newmb == NULL) { 1065255570Strasz ICL_WARN("failed to allocate mbuf for %zd bytes", len); 1066255570Strasz return (ENOMEM); 1067255570Strasz } 1068255570Strasz 1069255570Strasz for (mb = newmb; mb != NULL; mb = mb->m_next) { 1070255570Strasz copylen = min(M_TRAILINGSPACE(mb), len - off); 1071255570Strasz memcpy(mtod(mb, char *), (const char *)addr + off, copylen); 1072255570Strasz mb->m_len = copylen; 1073255570Strasz off += copylen; 1074255570Strasz } 1075255570Strasz KASSERT(off == len, ("%s: off != len", __func__)); 1076255570Strasz 1077255570Strasz if (request->ip_data_mbuf == NULL) { 1078255570Strasz request->ip_data_mbuf = newmb; 1079255570Strasz request->ip_data_len = len; 1080255570Strasz } else { 1081255570Strasz m_cat(request->ip_data_mbuf, newmb); 1082255570Strasz request->ip_data_len += len; 1083255570Strasz } 1084255570Strasz 1085255570Strasz return (0); 1086255570Strasz} 1087255570Strasz 1088255570Straszvoid 1089255570Straszicl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) 1090255570Strasz{ 1091255570Strasz 1092255570Strasz m_copydata(ip->ip_data_mbuf, off, len, addr); 1093255570Strasz} 1094255570Strasz 1095255570Straszvoid 1096255570Straszicl_pdu_queue(struct icl_pdu *ip) 1097255570Strasz{ 1098255570Strasz struct icl_conn *ic; 1099255570Strasz 1100255570Strasz ic = ip->ip_conn; 1101255570Strasz 1102265495Strasz ICL_CONN_LOCK_ASSERT(ic); 1103265495Strasz 1104255570Strasz if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1105255570Strasz ICL_DEBUG("icl_pdu_queue on closed connection"); 1106256058Strasz icl_pdu_free(ip); 1107255570Strasz return; 1108255570Strasz } 1109265502Strasz 1110265502Strasz if (!STAILQ_EMPTY(&ic->ic_to_send)) { 1111265502Strasz STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1112265502Strasz /* 1113265502Strasz * If the queue is not empty, someone else had already 1114265502Strasz * signaled the send thread; no need to do that again, 1115265502Strasz * just return. 1116265502Strasz */ 1117265502Strasz return; 1118265502Strasz } 1119265502Strasz 1120265500Strasz STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); 1121255570Strasz cv_signal(&ic->ic_send_cv); 1122255570Strasz} 1123255570Strasz 1124255570Straszstruct icl_conn * 1125265496Straszicl_conn_new(const char *name, struct mtx *lock) 1126255570Strasz{ 1127255570Strasz struct icl_conn *ic; 1128255570Strasz 1129255570Strasz refcount_acquire(&icl_ncons); 1130255570Strasz 1131255570Strasz ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO); 1132255570Strasz 1133265500Strasz STAILQ_INIT(&ic->ic_to_send); 1134265495Strasz ic->ic_lock = lock; 1135255570Strasz cv_init(&ic->ic_send_cv, "icl_tx"); 1136255570Strasz cv_init(&ic->ic_receive_cv, "icl_rx"); 1137265488Strasz#ifdef DIAGNOSTIC 1138255570Strasz refcount_init(&ic->ic_outstanding_pdus, 0); 1139265488Strasz#endif 1140255570Strasz ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; 1141265496Strasz ic->ic_name = name; 1142255570Strasz 1143255570Strasz return (ic); 1144255570Strasz} 1145255570Strasz 1146255570Straszvoid 1147255570Straszicl_conn_free(struct icl_conn *ic) 1148255570Strasz{ 1149255570Strasz 1150255570Strasz cv_destroy(&ic->ic_send_cv); 1151255570Strasz cv_destroy(&ic->ic_receive_cv); 1152255570Strasz uma_zfree(icl_conn_zone, ic); 1153255570Strasz refcount_release(&icl_ncons); 1154255570Strasz} 1155255570Strasz 1156255570Straszstatic int 1157255570Straszicl_conn_start(struct icl_conn *ic) 1158255570Strasz{ 1159265498Strasz size_t minspace; 1160255570Strasz struct sockopt opt; 1161255570Strasz int error, one = 1; 1162255570Strasz 1163255570Strasz ICL_CONN_LOCK(ic); 1164255570Strasz 1165255570Strasz /* 1166255570Strasz * XXX: Ugly hack. 1167255570Strasz */ 1168255570Strasz if (ic->ic_socket == NULL) { 1169255570Strasz ICL_CONN_UNLOCK(ic); 1170255570Strasz return (EINVAL); 1171255570Strasz } 1172255570Strasz 1173255570Strasz ic->ic_receive_state = ICL_CONN_STATE_BHS; 1174255570Strasz ic->ic_receive_len = sizeof(struct iscsi_bhs); 1175255570Strasz ic->ic_disconnecting = false; 1176255570Strasz 1177255570Strasz ICL_CONN_UNLOCK(ic); 1178255570Strasz 1179255570Strasz /* 1180265498Strasz * For sendspace, this is required because the current code cannot 1181265498Strasz * send a PDU in pieces; thus, the minimum buffer size is equal 1182265498Strasz * to the maximum PDU size. "+4" is to account for possible padding. 1183255570Strasz * 1184265498Strasz * What we should actually do here is to use autoscaling, but set 1185265498Strasz * some minimal buffer size to "minspace". I don't know a way to do 1186265498Strasz * that, though. 1187255570Strasz */ 1188265498Strasz minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 1189265498Strasz ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 1190265498Strasz if (sendspace < minspace) { 1191265498Strasz ICL_WARN("kern.icl.sendspace too low; must be at least %zd", 1192265498Strasz minspace); 1193265498Strasz sendspace = minspace; 1194265498Strasz } 1195265498Strasz if (recvspace < minspace) { 1196265498Strasz ICL_WARN("kern.icl.recvspace too low; must be at least %zd", 1197265498Strasz minspace); 1198265498Strasz recvspace = minspace; 1199265498Strasz } 1200265498Strasz 1201265498Strasz error = soreserve(ic->ic_socket, sendspace, recvspace); 1202255570Strasz if (error != 0) { 1203255570Strasz ICL_WARN("soreserve failed with error %d", error); 1204255570Strasz icl_conn_close(ic); 1205255570Strasz return (error); 1206255570Strasz } 1207255570Strasz 1208255570Strasz /* 1209255570Strasz * Disable Nagle. 1210255570Strasz */ 1211255570Strasz bzero(&opt, sizeof(opt)); 1212255570Strasz opt.sopt_dir = SOPT_SET; 1213255570Strasz opt.sopt_level = IPPROTO_TCP; 1214255570Strasz opt.sopt_name = TCP_NODELAY; 1215255570Strasz opt.sopt_val = &one; 1216255570Strasz opt.sopt_valsize = sizeof(one); 1217255570Strasz error = sosetopt(ic->ic_socket, &opt); 1218255570Strasz if (error != 0) { 1219255570Strasz ICL_WARN("disabling TCP_NODELAY failed with error %d", error); 1220255570Strasz icl_conn_close(ic); 1221255570Strasz return (error); 1222255570Strasz } 1223255570Strasz 1224255570Strasz /* 1225255570Strasz * Start threads. 1226255570Strasz */ 1227265496Strasz error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", 1228265496Strasz ic->ic_name); 1229255570Strasz if (error != 0) { 1230255570Strasz ICL_WARN("kthread_add(9) failed with error %d", error); 1231255570Strasz icl_conn_close(ic); 1232255570Strasz return (error); 1233255570Strasz } 1234255570Strasz 1235265496Strasz error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", 1236265496Strasz ic->ic_name); 1237255570Strasz if (error != 0) { 1238255570Strasz ICL_WARN("kthread_add(9) failed with error %d", error); 1239255570Strasz icl_conn_close(ic); 1240255570Strasz return (error); 1241255570Strasz } 1242255570Strasz 1243255570Strasz /* 1244255570Strasz * Register socket upcall, to get notified about incoming PDUs 1245255570Strasz * and free space to send outgoing ones. 1246255570Strasz */ 1247255570Strasz SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1248255570Strasz soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); 1249255570Strasz SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1250255570Strasz SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1251255570Strasz soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); 1252255570Strasz SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1253255570Strasz 1254255570Strasz return (0); 1255255570Strasz} 1256255570Strasz 1257255570Straszint 1258255570Straszicl_conn_handoff(struct icl_conn *ic, int fd) 1259255570Strasz{ 1260255570Strasz struct file *fp; 1261255570Strasz struct socket *so; 1262255570Strasz cap_rights_t rights; 1263255570Strasz int error; 1264255570Strasz 1265265495Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 1266265495Strasz 1267255570Strasz /* 1268255570Strasz * Steal the socket from userland. 1269255570Strasz */ 1270255570Strasz error = fget(curthread, fd, 1271255570Strasz cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 1272255570Strasz if (error != 0) 1273255570Strasz return (error); 1274255570Strasz if (fp->f_type != DTYPE_SOCKET) { 1275255570Strasz fdrop(fp, curthread); 1276255570Strasz return (EINVAL); 1277255570Strasz } 1278255570Strasz so = fp->f_data; 1279255570Strasz if (so->so_type != SOCK_STREAM) { 1280255570Strasz fdrop(fp, curthread); 1281255570Strasz return (EINVAL); 1282255570Strasz } 1283255570Strasz 1284255570Strasz ICL_CONN_LOCK(ic); 1285255570Strasz 1286255570Strasz if (ic->ic_socket != NULL) { 1287255570Strasz ICL_CONN_UNLOCK(ic); 1288255570Strasz fdrop(fp, curthread); 1289255570Strasz return (EBUSY); 1290255570Strasz } 1291255570Strasz 1292255570Strasz ic->ic_socket = fp->f_data; 1293255570Strasz fp->f_ops = &badfileops; 1294255570Strasz fp->f_data = NULL; 1295255570Strasz fdrop(fp, curthread); 1296255570Strasz ICL_CONN_UNLOCK(ic); 1297255570Strasz 1298255570Strasz error = icl_conn_start(ic); 1299255570Strasz 1300255570Strasz return (error); 1301255570Strasz} 1302255570Strasz 1303255570Straszvoid 1304255570Straszicl_conn_shutdown(struct icl_conn *ic) 1305255570Strasz{ 1306265495Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 1307255570Strasz 1308255570Strasz ICL_CONN_LOCK(ic); 1309255570Strasz if (ic->ic_socket == NULL) { 1310255570Strasz ICL_CONN_UNLOCK(ic); 1311255570Strasz return; 1312255570Strasz } 1313255570Strasz ICL_CONN_UNLOCK(ic); 1314255570Strasz 1315255570Strasz soshutdown(ic->ic_socket, SHUT_RDWR); 1316255570Strasz} 1317255570Strasz 1318255570Straszvoid 1319255570Straszicl_conn_close(struct icl_conn *ic) 1320255570Strasz{ 1321255570Strasz struct icl_pdu *pdu; 1322255570Strasz 1323265495Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 1324265495Strasz 1325255570Strasz ICL_CONN_LOCK(ic); 1326255570Strasz if (ic->ic_socket == NULL) { 1327255570Strasz ICL_CONN_UNLOCK(ic); 1328255570Strasz return; 1329255570Strasz } 1330255570Strasz 1331265502Strasz /* 1332265502Strasz * Deregister socket upcalls. 1333265502Strasz */ 1334265502Strasz ICL_CONN_UNLOCK(ic); 1335265502Strasz SOCKBUF_LOCK(&ic->ic_socket->so_snd); 1336265502Strasz if (ic->ic_socket->so_snd.sb_upcall != NULL) 1337265502Strasz soupcall_clear(ic->ic_socket, SO_SND); 1338265502Strasz SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); 1339265502Strasz SOCKBUF_LOCK(&ic->ic_socket->so_rcv); 1340265502Strasz if (ic->ic_socket->so_rcv.sb_upcall != NULL) 1341265502Strasz soupcall_clear(ic->ic_socket, SO_RCV); 1342265502Strasz SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); 1343265502Strasz ICL_CONN_LOCK(ic); 1344265502Strasz 1345255570Strasz ic->ic_disconnecting = true; 1346255570Strasz 1347255570Strasz /* 1348255570Strasz * Wake up the threads, so they can properly terminate. 1349255570Strasz */ 1350255570Strasz cv_signal(&ic->ic_receive_cv); 1351255570Strasz cv_signal(&ic->ic_send_cv); 1352255570Strasz while (ic->ic_receive_running || ic->ic_send_running) { 1353255570Strasz //ICL_DEBUG("waiting for send/receive threads to terminate"); 1354255570Strasz ICL_CONN_UNLOCK(ic); 1355255570Strasz cv_signal(&ic->ic_receive_cv); 1356255570Strasz cv_signal(&ic->ic_send_cv); 1357255570Strasz pause("icl_close", 1 * hz); 1358255570Strasz ICL_CONN_LOCK(ic); 1359255570Strasz } 1360255570Strasz //ICL_DEBUG("send/receive threads terminated"); 1361255570Strasz 1362265502Strasz ICL_CONN_UNLOCK(ic); 1363255570Strasz soclose(ic->ic_socket); 1364265502Strasz ICL_CONN_LOCK(ic); 1365255570Strasz ic->ic_socket = NULL; 1366255570Strasz 1367255570Strasz if (ic->ic_receive_pdu != NULL) { 1368255570Strasz //ICL_DEBUG("freeing partially received PDU"); 1369255570Strasz icl_pdu_free(ic->ic_receive_pdu); 1370255570Strasz ic->ic_receive_pdu = NULL; 1371255570Strasz } 1372255570Strasz 1373255570Strasz /* 1374255570Strasz * Remove any outstanding PDUs from the send queue. 1375255570Strasz */ 1376265500Strasz while (!STAILQ_EMPTY(&ic->ic_to_send)) { 1377265500Strasz pdu = STAILQ_FIRST(&ic->ic_to_send); 1378265500Strasz STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); 1379255570Strasz icl_pdu_free(pdu); 1380255570Strasz } 1381255570Strasz 1382265500Strasz KASSERT(STAILQ_EMPTY(&ic->ic_to_send), 1383255570Strasz ("destroying session with non-empty send queue")); 1384265499Strasz#ifdef DIAGNOSTIC 1385255570Strasz KASSERT(ic->ic_outstanding_pdus == 0, 1386255570Strasz ("destroying session with %d outstanding PDUs", 1387255570Strasz ic->ic_outstanding_pdus)); 1388255570Strasz#endif 1389255570Strasz ICL_CONN_UNLOCK(ic); 1390255570Strasz} 1391255570Strasz 1392255570Straszbool 1393255570Straszicl_conn_connected(struct icl_conn *ic) 1394255570Strasz{ 1395265495Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 1396255570Strasz 1397255570Strasz ICL_CONN_LOCK(ic); 1398255570Strasz if (ic->ic_socket == NULL) { 1399255570Strasz ICL_CONN_UNLOCK(ic); 1400255570Strasz return (false); 1401255570Strasz } 1402255570Strasz if (ic->ic_socket->so_error != 0) { 1403255570Strasz ICL_CONN_UNLOCK(ic); 1404255570Strasz return (false); 1405255570Strasz } 1406255570Strasz ICL_CONN_UNLOCK(ic); 1407255570Strasz return (true); 1408255570Strasz} 1409255570Strasz 1410255570Strasz#ifdef ICL_KERNEL_PROXY 1411255570Straszint 1412255570Straszicl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) 1413255570Strasz{ 1414255570Strasz int error; 1415255570Strasz 1416265495Strasz ICL_CONN_LOCK_ASSERT_NOT(ic); 1417265495Strasz 1418255570Strasz if (so->so_type != SOCK_STREAM) 1419255570Strasz return (EINVAL); 1420255570Strasz 1421255570Strasz ICL_CONN_LOCK(ic); 1422255570Strasz if (ic->ic_socket != NULL) { 1423255570Strasz ICL_CONN_UNLOCK(ic); 1424255570Strasz return (EBUSY); 1425255570Strasz } 1426255570Strasz ic->ic_socket = so; 1427255570Strasz ICL_CONN_UNLOCK(ic); 1428255570Strasz 1429255570Strasz error = icl_conn_start(ic); 1430255570Strasz 1431255570Strasz return (error); 1432255570Strasz} 1433255570Strasz#endif /* ICL_KERNEL_PROXY */ 1434255570Strasz 1435255570Straszstatic int 1436255570Straszicl_unload(void) 1437255570Strasz{ 1438255570Strasz 1439255570Strasz if (icl_ncons != 0) 1440255570Strasz return (EBUSY); 1441255570Strasz 1442255570Strasz uma_zdestroy(icl_conn_zone); 1443255570Strasz uma_zdestroy(icl_pdu_zone); 1444255570Strasz 1445255570Strasz return (0); 1446255570Strasz} 1447255570Strasz 1448255570Straszstatic void 1449255570Straszicl_load(void) 1450255570Strasz{ 1451255570Strasz 1452255570Strasz icl_conn_zone = uma_zcreate("icl_conn", 1453255570Strasz sizeof(struct icl_conn), NULL, NULL, NULL, NULL, 1454256058Strasz UMA_ALIGN_PTR, 0); 1455255570Strasz icl_pdu_zone = uma_zcreate("icl_pdu", 1456255570Strasz sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, 1457256058Strasz UMA_ALIGN_PTR, 0); 1458255570Strasz 1459255570Strasz refcount_init(&icl_ncons, 0); 1460255570Strasz} 1461255570Strasz 1462255570Straszstatic int 1463255570Straszicl_modevent(module_t mod, int what, void *arg) 1464255570Strasz{ 1465255570Strasz 1466255570Strasz switch (what) { 1467255570Strasz case MOD_LOAD: 1468255570Strasz icl_load(); 1469255570Strasz return (0); 1470255570Strasz case MOD_UNLOAD: 1471255570Strasz return (icl_unload()); 1472255570Strasz default: 1473255570Strasz return (EINVAL); 1474255570Strasz } 1475255570Strasz} 1476255570Strasz 1477255570Straszmoduledata_t icl_data = { 1478255570Strasz "icl", 1479255570Strasz icl_modevent, 1480255570Strasz 0 1481255570Strasz}; 1482255570Strasz 1483255570StraszDECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST); 1484255570StraszMODULE_VERSION(icl, 1); 1485