I Like It !

Friday, September 16, 2016

tcp_impl.h

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  */
   24 
   25 #ifndef _INET_TCP_IMPL_H
   26 #define _INET_TCP_IMPL_H
   27 
   28 /*
   29  * TCP implementation private declarations.  These interfaces are
   30  * used to build the IP module and are not meant to be accessed
   31  * by any modules except IP itself.  They are undocumented and are
   32  * subject to change without notice.
   33  */
   34 
   35 #ifdef  __cplusplus
   36 extern "C" {
   37 #endif
   38 
   39 #ifdef _KERNEL
   40 
   41 #include 
   42 #include      /* For LBOLT_FASTPATH{,64} */
   43 #include 
   44 #include 
   45 #include 
   46 
   47 #define TCP_MOD_ID      5105
   48 
   49 extern struct qinit     tcp_sock_winit;
   50 extern struct qinit     tcp_winit;
   51 
   52 extern sock_downcalls_t sock_tcp_downcalls;
   53 
   54 /*
   55  * Bind hash list size and has function.  It has to be a power of 2 for
   56  * hashing.
   57  */
   58 #define TCP_BIND_FANOUT_SIZE    512
   59 #define TCP_BIND_HASH(lport) (ntohs(lport) & (TCP_BIND_FANOUT_SIZE - 1))
   60 
   61 /*
   62  * This implementation follows the 4.3BSD interpretation of the urgent
   63  * pointer and not RFC 1122. Switching to RFC 1122 behavior would cause
   64  * incompatible changes in protocols like telnet and rlogin.
   65  */
   66 #define TCP_OLD_URP_INTERPRETATION      1
   67 
   68 /* TCP option length */
   69 #define TCPOPT_NOP_LEN          1
   70 #define TCPOPT_MAXSEG_LEN       4
   71 #define TCPOPT_WS_LEN           3
   72 #define TCPOPT_REAL_WS_LEN      (TCPOPT_WS_LEN+1)
   73 #define TCPOPT_TSTAMP_LEN       10
   74 #define TCPOPT_REAL_TS_LEN      (TCPOPT_TSTAMP_LEN+2)
   75 #define TCPOPT_SACK_OK_LEN      2
   76 #define TCPOPT_REAL_SACK_OK_LEN (TCPOPT_SACK_OK_LEN+2)
   77 #define TCPOPT_REAL_SACK_LEN    4
   78 #define TCPOPT_MAX_SACK_LEN     36
   79 #define TCPOPT_HEADER_LEN       2
   80 
   81 /* Round up the value to the nearest mss. */
   82 #define MSS_ROUNDUP(value, mss)         ((((value) - 1) / (mss) + 1) * (mss))
   83 
   84 /*
   85  * Was this tcp created via socket() interface?
   86  */
   87 #define TCP_IS_SOCKET(tcp)      ((tcp)->tcp_issocket)
   88 
   89 /*
   90  * Is this tcp not attached to any upper client?
   91  */
   92 #define TCP_IS_DETACHED(tcp)    ((tcp)->tcp_detached)
   93 
   94 /* TCP timers related data strucutres.  Refer to tcp_timers.c. */
   95 typedef struct tcp_timer_s {
   96         conn_t  *connp;
   97         void    (*tcpt_proc)(void *);
   98         callout_id_t   tcpt_tid;
   99 } tcp_timer_t;
  100 
  101 extern kmem_cache_t *tcp_timercache;
  102 
  103 /*
  104  * Macro for starting various timers.  Retransmission timer has its own macro,
  105  * TCP_TIMER_RESTART().  tim is in millisec.
  106  */
  107 #define TCP_TIMER(tcp, f, tim)          \
  108         tcp_timeout(tcp->tcp_connp, f, tim)
  109 #define TCP_TIMER_CANCEL(tcp, id)       \
  110         tcp_timeout_cancel(tcp->tcp_connp, id)
  111 
  112 /*
  113  * To restart the TCP retransmission timer.  intvl is in millisec.
  114  */
  115 #define TCP_TIMER_RESTART(tcp, intvl) {                                 \
  116         if ((tcp)->tcp_timer_tid != 0)                                  \
  117                 (void) TCP_TIMER_CANCEL((tcp), (tcp)->tcp_timer_tid);   \
  118         (tcp)->tcp_timer_tid = TCP_TIMER((tcp), tcp_timer, (intvl));    \
  119 }
  120 
  121 /*
  122  * For scalability, we must not run a timer for every TCP connection
  123  * in TIME_WAIT state.  To see why, consider (for time wait interval of
  124  * 1 minutes):
  125  *      10,000 connections/sec * 60 seconds/time wait = 600,000 active conn's
  126  *
  127  * This list is ordered by time, so you need only delete from the head
  128  * until you get to entries which aren't old enough to delete yet.
  129  * The list consists of only the detached TIME_WAIT connections.
  130  *
  131  * When a tcp_t enters TIME_WAIT state, a timer is started (timeout is
  132  * tcps_time_wait_interval).  When the tcp_t is detached (upper layer closes
  133  * the end point), it is moved to the time wait list and another timer is
  134  * started (expiry time is set at tcp_time_wait_expire, which is
  135  * also calculated using tcps_time_wait_interval).  This means that the
  136  * TIME_WAIT state can be extended (up to doubled) if the tcp_t doesn't
  137  * become detached for a long time.
  138  *
  139  * The list manipulations (including tcp_time_wait_next/prev)
  140  * are protected by the tcp_time_wait_lock. The content of the
  141  * detached TIME_WAIT connections is protected by the normal perimeters.
  142  *
  143  * This list is per squeue and squeues are shared across the tcp_stack_t's.
  144  * Things on tcp_time_wait_head remain associated with the tcp_stack_t
  145  * and conn_netstack.
  146  * The tcp_t's that are added to tcp_free_list are disassociated and
  147  * have NULL tcp_tcps and conn_netstack pointers.
  148  */
  149 typedef struct tcp_squeue_priv_s {
  150         kmutex_t        tcp_time_wait_lock;
  151         callout_id_t    tcp_time_wait_tid;
  152         tcp_t           *tcp_time_wait_head;
  153         tcp_t           *tcp_time_wait_tail;
  154         tcp_t           *tcp_free_list;
  155         uint_t          tcp_free_list_cnt;
  156 #ifdef DEBUG
  157         /*
  158          * For debugging purpose, true when tcp_time_wait_collector() is
  159          * running.
  160          */
  161         boolean_t       tcp_time_wait_running;
  162 #endif
  163 } tcp_squeue_priv_t;
  164 
  165 /*
  166  * Parameters for TCP Initial Send Sequence number (ISS) generation.  When
  167  * tcp_strong_iss is set to 1, which is the default, the ISS is calculated
  168  * by adding three components: a time component which grows by 1 every 4096
  169  * nanoseconds (versus every 4 microseconds suggested by RFC 793, page 27);
  170  * a per-connection component which grows by 125000 for every new connection;
  171  * and an "extra" component that grows by a random amount centered
  172  * approximately on 64000.  This causes the ISS generator to cycle every
  173  * 4.89 hours if no TCP connections are made, and faster if connections are
  174  * made.
  175  *
  176  * When tcp_strong_iss is set to 0, ISS is calculated by adding two
  177  * components: a time component which grows by 250000 every second; and
  178  * a per-connection component which grows by 125000 for every new connections.
  179  *
  180  * A third method, when tcp_strong_iss is set to 2, for generating ISS is
  181  * prescribed by Steve Bellovin.  This involves adding time, the 125000 per
  182  * connection, and a one-way hash (MD5) of the connection ID , a "truly" random (per RFC 1750) number, and a console-entered
  184  * password.
  185  */
  186 #define ISS_INCR        250000
  187 #define ISS_NSEC_SHT    12
  188 
  189 /* Macros for timestamp comparisons */
  190 #define TSTMP_GEQ(a, b) ((int32_t)((a)-(b)) >= 0)
  191 #define TSTMP_LT(a, b)  ((int32_t)((a)-(b)) < 0)
  192 
  193 /*
  194  * Initialize cwnd according to RFC 3390.  def_max_init_cwnd is
  195  * either tcp_slow_start_initial or tcp_slow_start_after idle
  196  * depending on the caller.  If the upper layer has not used the
  197  * TCP_INIT_CWND option to change the initial cwnd, tcp_init_cwnd
  198  * should be 0 and we use the formula in RFC 3390 to set tcp_cwnd.
  199  * If the upper layer has changed set the tcp_init_cwnd, just use
  200  * it to calculate the tcp_cwnd.
  201  */
  202 #define TCP_SET_INIT_CWND(tcp, mss, def_max_init_cwnd)                  \
  203 {                                                                       \
  204         if ((tcp)->tcp_init_cwnd == 0) {                                \
  205                 (tcp)->tcp_cwnd = MIN(def_max_init_cwnd * (mss),        \
  206                     MIN(4 * (mss), MAX(2 * (mss), 4380 / (mss) * (mss)))); \
  207         } else {                                                        \
  208                 (tcp)->tcp_cwnd = (tcp)->tcp_init_cwnd * (mss);         \
  209         }                                                               \
  210         tcp->tcp_cwnd_cnt = 0;                                          \
  211 }
  212 
  213 /*
  214  * Set ECN capable transport (ECT) code point in IP header.
  215  *
  216  * Note that there are 2 ECT code points '01' and '10', which are called
  217  * ECT(1) and ECT(0) respectively.  Here we follow the original ECT code
  218  * point ECT(0) for TCP as described in RFC 2481.
  219  */
  220 #define TCP_SET_ECT(tcp, iph) \
  221         if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
  222                 /* We need to clear the code point first. */ \
  223                 ((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
  224                 ((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
  225         } else { \
  226                 ((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
  227                 ((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \
  228         }
  229 
  230 /*
  231  * Set tcp_rto with boundary checking.
  232  */
  233 #define TCP_SET_RTO(tcp, rto) \
  234         if ((rto) < (tcp)->tcp_rto_min)                 \
  235                 (tcp)->tcp_rto = (tcp)->tcp_rto_min;    \
  236         else if ((rto) > (tcp)->tcp_rto_max)            \
  237                 (tcp)->tcp_rto = (tcp)->tcp_rto_max;    \
  238         else                                            \
  239                 (tcp)->tcp_rto = (rto);
  240 
  241 /*
  242  * TCP options struct returned from tcp_parse_options.
  243  */
  244 typedef struct tcp_opt_s {
  245         uint32_t        tcp_opt_mss;
  246         uint32_t        tcp_opt_wscale;
  247         uint32_t        tcp_opt_ts_val;
  248         uint32_t        tcp_opt_ts_ecr;
  249         tcp_t           *tcp;
  250 } tcp_opt_t;
  251 
  252 /*
  253  * Write-side flow-control is implemented via the per instance STREAMS
  254  * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s)
  255  * and clearing QFULL and calling qbackenable() to restart the flow based
  256  * on the number of TCP unsent bytes (i.e. those not on the wire waiting
  257  * for a remote ACK).
  258  *
  259  * This is different than a standard STREAMS kmod which when using the
  260  * STREAMS Q the framework would automatictly flow-control based on the
  261  * defined hiwat/lowat values as mblk_t's are enqueued/dequeued.
  262  *
  263  * As of FireEngine TCP write-side flow-control needs to take into account
  264  * both the unsent tcp_xmit list bytes but also any squeue_t enqueued bytes
  265  * (i.e. from tcp_wput() -> tcp_output()).
  266  *
  267  * This is accomplished by adding a new tcp_t fields, tcp_squeue_bytes, to
  268  * count the number of bytes enqueued by tcp_wput() and the number of bytes
  269  * dequeued and processed by tcp_output().
  270  *
  271  * So, the total number of bytes unsent is (squeue_bytes + unsent) with all
  272  * flow-control uses of unsent replaced with the macro TCP_UNSENT_BYTES.
  273  */
  274 extern void     tcp_clrqfull(tcp_t *);
  275 extern void     tcp_setqfull(tcp_t *);
  276 
  277 #define TCP_UNSENT_BYTES(tcp) \
  278         ((tcp)->tcp_squeue_bytes + (tcp)->tcp_unsent)
  279 
  280 /*
  281  * Linked list struct to store listener connection limit configuration per
  282  * IP stack.  The list is stored at tcps_listener_conf in tcp_stack_t.
  283  *
  284  * tl_port: the listener port of this limit configuration
  285  * tl_ratio: the maximum amount of memory consumed by all concurrent TCP
  286  *           connections created by a listener does not exceed 1/tl_ratio
  287  *           of the total system memory.  Note that this is only an
  288  *           approximation.
  289  * tl_link: linked list struct
  290  */
  291 typedef struct tcp_listener_s {
  292         in_port_t       tl_port;
  293         uint32_t        tl_ratio;
  294         list_node_t     tl_link;
  295 } tcp_listener_t;
  296 
  297 /*
  298  * If there is a limit set on the number of connections allowed per each
  299  * listener, the following struct is used to store that counter.  It keeps
  300  * the number of TCP connection created by a listener.  Note that this needs
  301  * to be separated from the listener since the listener can go away before
  302  * all the connections are gone.
  303  *
  304  * When the struct is allocated, tlc_cnt is set to 1.  When a new connection
  305  * is created by the listener, tlc_cnt is incremented by 1.  When a connection
  306  * created by the listener goes away, tlc_count is decremented by 1.  When the
  307  * listener itself goes away, tlc_cnt is decremented  by one.  The last
  308  * connection (or the listener) which decrements tlc_cnt to zero frees the
  309  * struct.
  310  *
  311  * tlc_max is the maximum number of concurrent TCP connections created from a
  312  * listner.  It is calculated when the tcp_listen_cnt_t is allocated.
  313  *
  314  * tlc_report_time stores the time when cmn_err() is called to report that the
  315  * max has been exceeeded.  Report is done at most once every
  316  * TCP_TLC_REPORT_INTERVAL mins for a listener.
  317  *
  318  * tlc_drop stores the number of connection attempt dropped because the
  319  * limit has reached.
  320  */
  321 typedef struct tcp_listen_cnt_s {
  322         uint32_t        tlc_max;
  323         uint32_t        tlc_cnt;
  324         int64_t         tlc_report_time;
  325         uint32_t        tlc_drop;
  326 } tcp_listen_cnt_t;
  327 
  328 #define TCP_TLC_REPORT_INTERVAL (30 * MINUTES)
  329 
  330 #define TCP_DECR_LISTEN_CNT(tcp)                                        \
  331 {                                                                       \
  332         ASSERT((tcp)->tcp_listen_cnt->tlc_cnt > 0);                     \
  333         if (atomic_add_32_nv(&(tcp)->tcp_listen_cnt->tlc_cnt, -1) == 0) \
  334                 kmem_free((tcp)->tcp_listen_cnt, sizeof (tcp_listen_cnt_t)); \
  335         (tcp)->tcp_listen_cnt = NULL;                                   \
  336 }
  337 
  338 /* Increment and decrement the number of connections in tcp_stack_t. */
  339 #define TCPS_CONN_INC(tcps)                                             \
  340         atomic_inc_64(                                                  \
  341             (uint64_t *)&(tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_conn_cnt)
  342 
  343 #define TCPS_CONN_DEC(tcps)                                             \
  344         atomic_dec_64(                                                  \
  345             (uint64_t *)&(tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_conn_cnt)
  346 
  347 /*
  348  * When the system is under memory pressure, stack variable tcps_reclaim is
  349  * true, we shorten the connection timeout abort interval to tcp_early_abort
  350  * seconds.  Defined in tcp.c.
  351  */
  352 extern uint32_t tcp_early_abort;
  353 
  354 /*
  355  * To reach to an eager in Q0 which can be dropped due to an incoming
  356  * new SYN request when Q0 is full, a new doubly linked list is
  357  * introduced. This list allows to select an eager from Q0 in O(1) time.
  358  * This is needed to avoid spending too much time walking through the
  359  * long list of eagers in Q0 when tcp_drop_q0() is called. Each member of
  360  * this new list has to be a member of Q0.
  361  * This list is headed by listener's tcp_t. When the list is empty,
  362  * both the pointers - tcp_eager_next_drop_q0 and tcp_eager_prev_drop_q0,
  363  * of listener's tcp_t point to listener's tcp_t itself.
  364  *
  365  * Given an eager in Q0 and a listener, MAKE_DROPPABLE() puts the eager
  366  * in the list. MAKE_UNDROPPABLE() takes the eager out of the list.
  367  * These macros do not affect the eager's membership to Q0.
  368  */
  369 #define MAKE_DROPPABLE(listener, eager)                                 \
  370         if ((eager)->tcp_eager_next_drop_q0 == NULL) {                  \
  371                 (listener)->tcp_eager_next_drop_q0->tcp_eager_prev_drop_q0\
  372                     = (eager);                                          \
  373                 (eager)->tcp_eager_prev_drop_q0 = (listener);           \
  374                 (eager)->tcp_eager_next_drop_q0 =                       \
  375                     (listener)->tcp_eager_next_drop_q0;                 \
  376                 (listener)->tcp_eager_next_drop_q0 = (eager);           \
  377         }
  378 
  379 #define MAKE_UNDROPPABLE(eager)                                         \
  380         if ((eager)->tcp_eager_next_drop_q0 != NULL) {                  \
  381                 (eager)->tcp_eager_next_drop_q0->tcp_eager_prev_drop_q0 \
  382                     = (eager)->tcp_eager_prev_drop_q0;                  \
  383                 (eager)->tcp_eager_prev_drop_q0->tcp_eager_next_drop_q0 \
  384                     = (eager)->tcp_eager_next_drop_q0;                  \
  385                 (eager)->tcp_eager_prev_drop_q0 = NULL;                 \
  386                 (eager)->tcp_eager_next_drop_q0 = NULL;                 \
  387         }
  388 
  389 /*
  390  * The format argument to pass to tcp_display().
  391  * DISP_PORT_ONLY means that the returned string has only port info.
  392  * DISP_ADDR_AND_PORT means that the returned string also contains the
  393  * remote and local IP address.
  394  */
  395 #define DISP_PORT_ONLY          1
  396 #define DISP_ADDR_AND_PORT      2
  397 
  398 #define IP_ADDR_CACHE_SIZE      2048
  399 #define IP_ADDR_CACHE_HASH(faddr)                                       \
  400         (ntohl(faddr) & (IP_ADDR_CACHE_SIZE -1))
  401 
  402 /* TCP cwnd burst factor. */
  403 #define TCP_CWND_INFINITE       65535
  404 #define TCP_CWND_SS             3
  405 #define TCP_CWND_NORMAL         5
  406 
  407 /*
  408  * TCP reassembly macros.  We hide starting and ending sequence numbers in
  409  * b_next and b_prev of messages on the reassembly queue.  The messages are
  410  * chained using b_cont.  These macros are used in tcp_reass() so we don't
  411  * have to see the ugly casts and assignments.
  412  */
  413 #define TCP_REASS_SEQ(mp)               ((uint32_t)(uintptr_t)((mp)->b_next))
  414 #define TCP_REASS_SET_SEQ(mp, u)        ((mp)->b_next = \
  415                                         (mblk_t *)(uintptr_t)(u))
  416 #define TCP_REASS_END(mp)               ((uint32_t)(uintptr_t)((mp)->b_prev))
  417 #define TCP_REASS_SET_END(mp, u)        ((mp)->b_prev = \
  418                                         (mblk_t *)(uintptr_t)(u))
  419 
  420 #define tcps_time_wait_interval         tcps_propinfo_tbl[0].prop_cur_uval
  421 #define tcps_conn_req_max_q             tcps_propinfo_tbl[1].prop_cur_uval
  422 #define tcps_conn_req_max_q0            tcps_propinfo_tbl[2].prop_cur_uval
  423 #define tcps_conn_req_min               tcps_propinfo_tbl[3].prop_cur_uval
  424 #define tcps_conn_grace_period          tcps_propinfo_tbl[4].prop_cur_uval
  425 #define tcps_cwnd_max_                  tcps_propinfo_tbl[5].prop_cur_uval
  426 #define tcps_dbg                        tcps_propinfo_tbl[6].prop_cur_uval
  427 #define tcps_smallest_nonpriv_port      tcps_propinfo_tbl[7].prop_cur_uval
  428 #define tcps_ip_abort_cinterval         tcps_propinfo_tbl[8].prop_cur_uval
  429 #define tcps_ip_abort_linterval         tcps_propinfo_tbl[9].prop_cur_uval
  430 #define tcps_ip_abort_interval          tcps_propinfo_tbl[10].prop_cur_uval
  431 #define tcps_ip_notify_cinterval        tcps_propinfo_tbl[11].prop_cur_uval
  432 #define tcps_ip_notify_interval         tcps_propinfo_tbl[12].prop_cur_uval
  433 #define tcps_ipv4_ttl                   tcps_propinfo_tbl[13].prop_cur_uval
  434 #define tcps_keepalive_interval_high    tcps_propinfo_tbl[14].prop_max_uval
  435 #define tcps_keepalive_interval         tcps_propinfo_tbl[14].prop_cur_uval
  436 #define tcps_keepalive_interval_low     tcps_propinfo_tbl[14].prop_min_uval
  437 #define tcps_maxpsz_multiplier          tcps_propinfo_tbl[15].prop_cur_uval
  438 #define tcps_mss_def_ipv4               tcps_propinfo_tbl[16].prop_cur_uval
  439 #define tcps_mss_max_ipv4               tcps_propinfo_tbl[17].prop_cur_uval
  440 #define tcps_mss_min                    tcps_propinfo_tbl[18].prop_cur_uval
  441 #define tcps_naglim_def                 tcps_propinfo_tbl[19].prop_cur_uval
  442 #define tcps_rexmit_interval_initial_high       \
  443                                         tcps_propinfo_tbl[20].prop_max_uval
  444 #define tcps_rexmit_interval_initial    tcps_propinfo_tbl[20].prop_cur_uval
  445 #define tcps_rexmit_interval_initial_low        \
  446                                         tcps_propinfo_tbl[20].prop_min_uval
  447 #define tcps_rexmit_interval_max_high   tcps_propinfo_tbl[21].prop_max_uval
  448 #define tcps_rexmit_interval_max        tcps_propinfo_tbl[21].prop_cur_uval
  449 #define tcps_rexmit_interval_max_low    tcps_propinfo_tbl[21].prop_min_uval
  450 #define tcps_rexmit_interval_min_high   tcps_propinfo_tbl[22].prop_max_uval
  451 #define tcps_rexmit_interval_min        tcps_propinfo_tbl[22].prop_cur_uval
  452 #define tcps_rexmit_interval_min_low    tcps_propinfo_tbl[22].prop_min_uval
  453 #define tcps_deferred_ack_interval      tcps_propinfo_tbl[23].prop_cur_uval
  454 #define tcps_snd_lowat_fraction         tcps_propinfo_tbl[24].prop_cur_uval
  455 #define tcps_dupack_fast_retransmit     tcps_propinfo_tbl[25].prop_cur_uval
  456 #define tcps_ignore_path_mtu            tcps_propinfo_tbl[26].prop_cur_bval
  457 #define tcps_smallest_anon_port         tcps_propinfo_tbl[27].prop_cur_uval
  458 #define tcps_largest_anon_port          tcps_propinfo_tbl[28].prop_cur_uval
  459 #define tcps_xmit_hiwat                 tcps_propinfo_tbl[29].prop_cur_uval
  460 #define tcps_xmit_lowat                 tcps_propinfo_tbl[30].prop_cur_uval
  461 #define tcps_recv_hiwat                 tcps_propinfo_tbl[31].prop_cur_uval
  462 #define tcps_recv_hiwat_minmss          tcps_propinfo_tbl[32].prop_cur_uval
  463 #define tcps_fin_wait_2_flush_interval_high     \
  464                                         tcps_propinfo_tbl[33].prop_max_uval
  465 #define tcps_fin_wait_2_flush_interval  tcps_propinfo_tbl[33].prop_cur_uval
  466 #define tcps_fin_wait_2_flush_interval_low      \
  467                                         tcps_propinfo_tbl[33].prop_min_uval
  468 #define tcps_max_buf                    tcps_propinfo_tbl[34].prop_cur_uval
  469 #define tcps_strong_iss                 tcps_propinfo_tbl[35].prop_cur_uval
  470 #define tcps_rtt_updates                tcps_propinfo_tbl[36].prop_cur_uval
  471 #define tcps_wscale_always              tcps_propinfo_tbl[37].prop_cur_bval
  472 #define tcps_tstamp_always              tcps_propinfo_tbl[38].prop_cur_bval
  473 #define tcps_tstamp_if_wscale           tcps_propinfo_tbl[39].prop_cur_bval
  474 #define tcps_rexmit_interval_extra      tcps_propinfo_tbl[40].prop_cur_uval
  475 #define tcps_deferred_acks_max          tcps_propinfo_tbl[41].prop_cur_uval
  476 #define tcps_slow_start_after_idle      tcps_propinfo_tbl[42].prop_cur_uval
  477 #define tcps_slow_start_initial         tcps_propinfo_tbl[43].prop_cur_uval
  478 #define tcps_sack_permitted             tcps_propinfo_tbl[44].prop_cur_uval
  479 #define tcps_ipv6_hoplimit              tcps_propinfo_tbl[45].prop_cur_uval
  480 #define tcps_mss_def_ipv6               tcps_propinfo_tbl[46].prop_cur_uval
  481 #define tcps_mss_max_ipv6               tcps_propinfo_tbl[47].prop_cur_uval
  482 #define tcps_rev_src_routes             tcps_propinfo_tbl[48].prop_cur_bval
  483 #define tcps_local_dack_interval        tcps_propinfo_tbl[49].prop_cur_uval
  484 #define tcps_local_dacks_max            tcps_propinfo_tbl[50].prop_cur_uval
  485 #define tcps_ecn_permitted              tcps_propinfo_tbl[51].prop_cur_uval
  486 #define tcps_rst_sent_rate_enabled      tcps_propinfo_tbl[52].prop_cur_bval
  487 #define tcps_rst_sent_rate              tcps_propinfo_tbl[53].prop_cur_uval
  488 #define tcps_push_timer_interval        tcps_propinfo_tbl[54].prop_cur_uval
  489 #define tcps_use_smss_as_mss_opt        tcps_propinfo_tbl[55].prop_cur_bval
  490 #define tcps_keepalive_abort_interval_high \
  491                                         tcps_propinfo_tbl[56].prop_max_uval
  492 #define tcps_keepalive_abort_interval \
  493                                         tcps_propinfo_tbl[56].prop_cur_uval
  494 #define tcps_keepalive_abort_interval_low \
  495                                         tcps_propinfo_tbl[56].prop_min_uval
  496 #define tcps_wroff_xtra                 tcps_propinfo_tbl[57].prop_cur_uval
  497 #define tcps_dev_flow_ctl               tcps_propinfo_tbl[58].prop_cur_bval
  498 #define tcps_reass_timeout              tcps_propinfo_tbl[59].prop_cur_uval
  499 
  500 extern struct qinit tcp_rinitv4, tcp_rinitv6;
  501 extern boolean_t do_tcp_fusion;
  502 
  503 /*
  504  * Object to represent database of options to search passed to
  505  * {sock,tpi}optcom_req() interface routine to take care of option
  506  * management and associated methods.
  507  */
  508 extern optdb_obj_t      tcp_opt_obj;
  509 extern uint_t           tcp_max_optsize;
  510 
  511 extern int tcp_squeue_flag;
  512 
  513 extern uint_t tcp_free_list_max_cnt;
  514 
  515 /*
  516  * Functions in tcp.c.
  517  */
  518 extern void     tcp_acceptor_hash_insert(t_uscalar_t, tcp_t *);
  519 extern tcp_t    *tcp_acceptor_hash_lookup(t_uscalar_t, tcp_stack_t *);
  520 extern void     tcp_acceptor_hash_remove(tcp_t *);
  521 extern mblk_t   *tcp_ack_mp(tcp_t *);
  522 extern int      tcp_build_hdrs(tcp_t *);
  523 extern void     tcp_cleanup(tcp_t *);
  524 extern int      tcp_clean_death(tcp_t *, int);
  525 extern void     tcp_clean_death_wrapper(void *, mblk_t *, void *,
  526                     ip_recv_attr_t *);
  527 extern void     tcp_close_common(conn_t *, int);
  528 extern void     tcp_close_detached(tcp_t *);
  529 extern void     tcp_close_mpp(mblk_t **);
  530 extern void     tcp_closei_local(tcp_t *);
  531 extern sock_lower_handle_t tcp_create(int, int, int, sock_downcalls_t **,
  532                     uint_t *, int *, int, cred_t *);
  533 extern conn_t   *tcp_create_common(cred_t *, boolean_t, boolean_t, int *);
  534 extern void     tcp_disconnect(tcp_t *, mblk_t *);
  535 extern char     *tcp_display(tcp_t *, char *, char);
  536 extern int      tcp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
  537                     boolean_t);
  538 extern int      tcp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
  539                     cred_t *, pid_t);
  540 extern int      tcp_do_listen(conn_t *, struct sockaddr *, socklen_t, int,
  541                     cred_t *, boolean_t);
  542 extern int      tcp_do_unbind(conn_t *);
  543 extern boolean_t        tcp_eager_blowoff(tcp_t *, t_scalar_t);
  544 extern void     tcp_eager_cleanup(tcp_t *, boolean_t);
  545 extern void     tcp_eager_kill(void *, mblk_t *, void *, ip_recv_attr_t *);
  546 extern void     tcp_eager_unlink(tcp_t *);
  547 extern int      tcp_getpeername(sock_lower_handle_t, struct sockaddr *,
  548                     socklen_t *, cred_t *);
  549 extern int      tcp_getsockname(sock_lower_handle_t, struct sockaddr *,
  550                     socklen_t *, cred_t *);
  551 extern void     tcp_init_values(tcp_t *, tcp_t *);
  552 extern void     tcp_ipsec_cleanup(tcp_t *);
  553 extern int      tcp_maxpsz_set(tcp_t *, boolean_t);
  554 extern void     tcp_mss_set(tcp_t *, uint32_t);
  555 extern void     tcp_reinput(conn_t *, mblk_t *, ip_recv_attr_t *, ip_stack_t *);
  556 extern void     tcp_rsrv(queue_t *);
  557 extern uint_t   tcp_rwnd_reopen(tcp_t *);
  558 extern int      tcp_rwnd_set(tcp_t *, uint32_t);
  559 extern int      tcp_set_destination(tcp_t *);
  560 extern void     tcp_set_ws_value(tcp_t *);
  561 extern void     tcp_stop_lingering(tcp_t *);
  562 extern void     tcp_update_pmtu(tcp_t *, boolean_t);
  563 extern mblk_t   *tcp_zcopy_backoff(tcp_t *, mblk_t *, boolean_t);
  564 extern boolean_t        tcp_zcopy_check(tcp_t *);
  565 extern void     tcp_zcopy_notify(tcp_t *);
  566 extern void     tcp_get_proto_props(tcp_t *, struct sock_proto_props *);
  567 
  568 /*
  569  * Bind related functions in tcp_bind.c
  570  */
  571 extern int      tcp_bind_check(conn_t *, struct sockaddr *, socklen_t,
  572                     cred_t *, boolean_t);
  573 extern void     tcp_bind_hash_insert(tf_t *, tcp_t *, int);
  574 extern void     tcp_bind_hash_remove(tcp_t *);
  575 extern in_port_t        tcp_bindi(tcp_t *, in_port_t, const in6_addr_t *,
  576                             int, boolean_t, boolean_t, boolean_t);
  577 extern in_port_t        tcp_update_next_port(in_port_t, const tcp_t *,
  578                             boolean_t);
  579 
  580 /*
  581  * Fusion related functions in tcp_fusion.c.
  582  */
  583 extern void     tcp_fuse(tcp_t *, uchar_t *, tcpha_t *);
  584 extern void     tcp_unfuse(tcp_t *);
  585 extern boolean_t tcp_fuse_output(tcp_t *, mblk_t *, uint32_t);
  586 extern void     tcp_fuse_output_urg(tcp_t *, mblk_t *);
  587 extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **);
  588 extern size_t   tcp_fuse_set_rcv_hiwat(tcp_t *, size_t);
  589 extern int      tcp_fuse_maxpsz(tcp_t *);
  590 extern void     tcp_fuse_backenable(tcp_t *);
  591 extern void     tcp_iss_key_init(uint8_t *, int, tcp_stack_t *);
  592 
  593 /*
  594  * Output related functions in tcp_output.c.
  595  */
  596 extern void     tcp_close_output(void *, mblk_t *, void *, ip_recv_attr_t *);
  597 extern void     tcp_output(void *, mblk_t *, void *, ip_recv_attr_t *);
  598 extern void     tcp_output_urgent(void *, mblk_t *, void *, ip_recv_attr_t *);
  599 extern void     tcp_rexmit_after_error(tcp_t *);
  600 extern void     tcp_sack_rexmit(tcp_t *, uint_t *);
  601 extern void     tcp_send_data(tcp_t *, mblk_t *);
  602 extern void     tcp_send_synack(void *, mblk_t *, void *, ip_recv_attr_t *);
  603 extern void     tcp_shutdown_output(void *, mblk_t *, void *, ip_recv_attr_t *);
  604 extern void     tcp_ss_rexmit(tcp_t *);
  605 extern void     tcp_update_xmit_tail(tcp_t *, uint32_t);
  606 extern void     tcp_wput(queue_t *, mblk_t *);
  607 extern void     tcp_wput_data(tcp_t *, mblk_t *, boolean_t);
  608 extern void     tcp_wput_sock(queue_t *, mblk_t *);
  609 extern void     tcp_wput_fallback(queue_t *, mblk_t *);
  610 extern void     tcp_xmit_ctl(char *, tcp_t *, uint32_t, uint32_t, int);
  611 extern void     tcp_xmit_listeners_reset(mblk_t *, ip_recv_attr_t *,
  612                     ip_stack_t *i, conn_t *);
  613 extern mblk_t   *tcp_xmit_mp(tcp_t *, mblk_t *, int32_t, int32_t *,
  614                     mblk_t **, uint32_t, boolean_t, uint32_t *, boolean_t);
  615 
  616 /*
  617  * Input related functions in tcp_input.c.
  618  */
  619 extern void     tcp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
  620 extern void     tcp_input_data(void *, mblk_t *, void *, ip_recv_attr_t *);
  621 extern void     tcp_input_listener_unbound(void *, mblk_t *, void *,
  622                     ip_recv_attr_t *);
  623 extern boolean_t        tcp_paws_check(tcp_t *, tcpha_t *, tcp_opt_t *);
  624 extern uint_t   tcp_rcv_drain(tcp_t *);
  625 extern void     tcp_rcv_enqueue(tcp_t *, mblk_t *, uint_t, cred_t *);
  626 extern boolean_t        tcp_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *,
  627                             ip_recv_attr_t *);
  628 
  629 /*
  630  * Kernel socket related functions in tcp_socket.c.
  631  */
  632 extern int      tcp_fallback(sock_lower_handle_t, queue_t *, boolean_t,
  633                     so_proto_quiesced_cb_t, sock_quiesce_arg_t *);
  634 extern boolean_t tcp_newconn_notify(tcp_t *, ip_recv_attr_t *);
  635 
  636 /*
  637  * Timer related functions in tcp_timers.c.
  638  */
  639 extern void     tcp_ack_timer(void *);
  640 extern void     tcp_close_linger_timeout(void *);
  641 extern void     tcp_keepalive_timer(void *);
  642 extern void     tcp_push_timer(void *);
  643 extern void     tcp_reass_timer(void *);
  644 extern mblk_t   *tcp_timermp_alloc(int);
  645 extern void     tcp_timermp_free(tcp_t *);
  646 extern timeout_id_t tcp_timeout(conn_t *, void (*)(void *), hrtime_t);
  647 extern clock_t  tcp_timeout_cancel(conn_t *, timeout_id_t);
  648 extern void     tcp_timer(void *arg);
  649 extern void     tcp_timers_stop(tcp_t *);
  650 
  651 /*
  652  * TCP TPI related functions in tcp_tpi.c.
  653  */
  654 extern void     tcp_addr_req(tcp_t *, mblk_t *);
  655 extern void     tcp_capability_req(tcp_t *, mblk_t *);
  656 extern boolean_t        tcp_conn_con(tcp_t *, uchar_t *, mblk_t *,
  657                             mblk_t **, ip_recv_attr_t *);
  658 extern void     tcp_err_ack(tcp_t *, mblk_t *, int, int);
  659 extern void     tcp_err_ack_prim(tcp_t *, mblk_t *, int, int, int);
  660 extern void     tcp_info_req(tcp_t *, mblk_t *);
  661 extern void     tcp_send_conn_ind(void *, mblk_t *, void *);
  662 extern void     tcp_send_pending(void *, mblk_t *, void *, ip_recv_attr_t *);
  663 extern void     tcp_tpi_accept(queue_t *, mblk_t *);
  664 extern void     tcp_tpi_bind(tcp_t *, mblk_t *);
  665 extern int      tcp_tpi_close(queue_t *, int);
  666 extern int      tcp_tpi_close_accept(queue_t *);
  667 extern void     tcp_tpi_connect(tcp_t *, mblk_t *);
  668 extern int      tcp_tpi_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
  669 extern int      tcp_tpi_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
  670                     uint_t *, uchar_t *, void *, cred_t *);
  671 extern void     tcp_tpi_unbind(tcp_t *, mblk_t *);
  672 extern void     tcp_tli_accept(tcp_t *, mblk_t *);
  673 extern void     tcp_use_pure_tpi(tcp_t *);
  674 extern void     tcp_do_capability_ack(tcp_t *, struct T_capability_ack *,
  675                     t_uscalar_t);
  676 
  677 /*
  678  * TCP option processing related functions in tcp_opt_data.c
  679  */
  680 extern int      tcp_opt_default(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
  681 extern int      tcp_opt_get(conn_t *, int, int, uchar_t *);
  682 extern int      tcp_opt_set(conn_t *, uint_t, int, int, uint_t, uchar_t *,
  683                     uint_t *, uchar_t *, void *, cred_t *);
  684 
  685 /*
  686  * TCP time wait processing related functions in tcp_time_wait.c.
  687  */
  688 extern void             tcp_time_wait_append(tcp_t *);
  689 extern void             tcp_time_wait_collector(void *);
  690 extern boolean_t        tcp_time_wait_remove(tcp_t *, tcp_squeue_priv_t *);
  691 extern void             tcp_time_wait_processing(tcp_t *, mblk_t *, uint32_t,
  692                             uint32_t, int, tcpha_t *, ip_recv_attr_t *);
  693 
  694 /*
  695  * Misc functions in tcp_misc.c.
  696  */
  697 extern uint32_t tcp_find_listener_conf(tcp_stack_t *, in_port_t);
  698 extern void     tcp_ioctl_abort_conn(queue_t *, mblk_t *);
  699 extern void     tcp_listener_conf_cleanup(tcp_stack_t *);
  700 extern void     tcp_stack_cpu_add(tcp_stack_t *, processorid_t);
  701 
  702 #endif  /* _KERNEL */
  703 
  704 #ifdef  __cplusplus
  705 }
  706 #endif
  707 
  708 #endif  /* _INET_TCP_IMPL_H */