<?xml version='1.0' encoding='utf-8'?>

<!-- [rfced] Change log section removed from draft-ietf-tcpm-2140bis-11-manual.txt --> version="1.0" encoding="UTF-8"?>

<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC0793 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0793.xml">
<!ENTITY RFC1122 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1122.xml">
<!ENTITY RFC1191 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1191.xml">
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC4821 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4821.xml">
<!ENTITY RFC5681 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5681.xml">
<!ENTITY RFC6298 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6298.xml">
<!ENTITY RFC7413 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7413.xml">
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY RFC8201 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8201.xml">
<!ENTITY I-D.allman-tcpm-bump-initcwnd SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-allman-tcpm-bump-initcwnd-00.xml">
<!ENTITY I-D.ietf-tcpm-generalized-ecn SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-tcpm-generalized-ecn-07.xml">
<!ENTITY I-D.hughes-restart SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-hughes-restart-00.xml">
<!ENTITY RFC1644 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1644.xml">
<!ENTITY RFC1379 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1379.xml">
<!ENTITY RFC2001 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2001.xml">
<!ENTITY RFC2140 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2140.xml">
<!ENTITY RFC2414 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2414.xml">
<!ENTITY RFC2663 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2663.xml">
<!ENTITY RFC3390 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3390.xml">
<!ENTITY RFC3124 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3124.xml">
<!ENTITY RFC4340 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4340.xml">
<!ENTITY RFC4960 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4960.xml">
<!ENTITY RFC5925 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5925.xml">
<!ENTITY RFC6437 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6437.xml">
<!ENTITY RFC6691 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6691.xml">
<!ENTITY RFC6928 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6928.xml">
<!ENTITY RFC7231 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7231.xml">
<!ENTITY RFC7323 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7323.xml">
<!ENTITY RFC7424 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7424.xml">
<!ENTITY RFC7540 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7540.xml">
<!ENTITY RFC7661 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7661.xml">
<!ENTITY RFC8684 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8684.xml">
]> "rfc2629-xhtml.ent">

<rfc submissionType="IETF" xmlns:xi="http://www.w3.org/2001/XInclude" docName="draft-ietf-tcpm-2140bis-11"
number="9040" submissionType="IETF" category="info" consensus="true" obsoletes="2140" ipr="trust200902">
	<!-- Generated by id2xml 1.5.0 on 2021-05-03T23:46:00Z -->
	<?rfc strict="yes"?>
	<?rfc compact="yes"?>
	<?rfc subcompact="no"?>
	<?rfc symrefs="yes"?>
	<?rfc sortrefs="no"?>
	<?rfc text-list-symbols="o*+-"?>
	<?rfc toc="yes"?>
ipr="trust200902" updates="" xml:lang="en" symRefs="true" sortRefs="true" tocInclude="true"
version="3">

	<front>
    <title>TCP Control Block Interdependence</title>
    <seriesInfo name="RFC" value="9040"/>
    <author initials="J." surname="Touch" fullname="Joe Touch">
      <organization abbrev="Independent"></organization> abbrev="Independent"/>
      <address>
        <postal>
          <street/>
          <city>Manhattan Beach</city>
          <region>CA</region>
          <code>90266</code>
          <country>United States of America</country>
        </postal>
        <phone>+1 (310) 560-0334</phone>
        <email>touch@strayalpha.com</email>
      </address>
    </author>
    <author initials="M." surname="Welzl" fullname="Michael Welzl">
      <organization>University of Oslo</organization>
      <address>
        <postal>
          <street>PO Box 1080 Blindern</street>
          <city>Oslo</city>
          <region/>
          <code>N-0316</code>
          <country>Norway</country>
        </postal>
        <phone>+47 22 85 24 20</phone>
        <email>michawe@ifi.uio.no</email>
      </address>
    </author>
    <author initials="S." surname="Islam" fullname="Safiqul Islam">
      <organization>University of Oslo</organization>
	<address><postal><street>PO
      <address>
        <postal>
          <street>PO Box 1080 Blindern</street>
          <street>Oslo  N-0316</street>
          <street>Norway</street>
        </postal>
        <phone>+47 22 84 08 37</phone>
        <email>safiquli@ifi.uio.no</email>
      </address>
    </author>
    <date year="2021" month="May"/> month="July"/>
    <workgroup>TCPM WG</workgroup>

<!-- [rfced] Please insert any keywords (beyond those that appear in
the title) for use on https://www.rfc-editor.org/search. -->

<keyword>example</keyword>

	<abstract><t>

    <abstract>
      <t>
   This memo provides guidance to TCP implementers that is intended to
   help improve connection convergence to steady-state operation
   without affecting interoperability. It updates and replaces RFC
   2140's description of sharing TCP state, as typically represented in
   TCP Control Blocks, among similar concurrent or consecutive
   connections.</t>
    </abstract>
  </front>
  <middle>
    <section title="Introduction" anchor="sect-1"><t> anchor="sect-1" numbered="true" toc="default">
      <name>Introduction</name>

      <t>
   TCP is a connection-oriented reliable transport protocol layered over IP
   <xref target="RFC0793"/>. target="RFC0793" format="default"/>. Each TCP connection maintains
   state, usually in a data structure called the TCP "TCP Control Block (TCB). (TCB)". The
   TCB contains information about the connection state, its associated local
   process, and feedback parameters about the connection's transmission
   properties. As originally specified and usually implemented, most TCB
   information is maintained on a per-connection basis. Some implementations
   share certain TCB information across connections to the same host <xref target="RFC2140"/>.
   target="RFC2140" format="default"/>. Such sharing is intended to lead to
   better overall transient performance, especially for numerous short-lived
   and simultaneous connections, as can be used in the World-Wide World Wide Web and
   other applications <xref target="Be94"/><xref target="Br02"/>. target="Be94" format="default"/> <xref
   target="Br02" format="default"/>. This sharing of state is intended to help
   TCP connections converge to long term long-term behavior (assuming stable application
   load, i.e., so-called "steady-state") more quickly without affecting TCP
   interoperability.</t>

      <t>
   This document updates RFC 2140's discussion of TCB state sharing and
   provides a complete replacement for that document. This state sharing
   affects only TCB initialization <xref target="RFC2140"/> target="RFC2140" format="default"/>
   and thus has no effect on the long-term behavior of TCP after a connection
   has been established nor or on interoperability. Path information shared
   across SYN destination port numbers assumes that TCP segments having the
   same host-pair experience the same path properties, i.e., that traffic is
   not routed differently based on port numbers or other connection parameters
   (also addressed further in <xref target="sect-8.1"/>). target="sect-8.1" format="default"/>). The
   observations about TCB sharing in this document apply similarly to any
   protocol with congestion state, including SCTP the Stream Control Transmission
   Protocol (SCTP) <xref target="RFC4960"/> target="RFC4960" format="default"/> and
   DCCP the Datagram
   Congestion Control Protocol (DCCP) <xref target="RFC4340"/>, target="RFC4340"
   format="default"/>, as well as for to individual subflows in Multipath TCP
   <xref target="RFC8684"/>.</t> target="RFC8684" format="default"/>.</t>
    </section>

    <section title="Conventions anchor="sect-2" numbered="true" toc="default">
      <name>Conventions Used in This Document" anchor="sect-2"><t> Document</name>

        <t>
    The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
   "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
    "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL
    NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>",
    "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
    "<bcp14>MAY</bcp14>", and
   "OPTIONAL" "<bcp14>OPTIONAL</bcp14>" in this document are
    to be interpreted as described in
   BCP 14 BCP&nbsp;14 <xref target="RFC2119"/>
    <xref target="RFC8174"/> when, and only when, they appear in all capitals,
    as shown here.</t> here.
        </t>

      <t>
   The core of this document describes behavior that is already permitted by
   TCP standards. As a result, it this document provides informative guidance but does not
   use normative language, language except when quoting other documents. Normative
   language is used in Appendix C <xref target="sect-c"/> as examples of requirements for
   future consideration.</t>
    </section>

    <section title="Terminology" anchor="sect-3"><t> anchor="sect-3" numbered="true" toc="default">
      <name>Terminology</name>

      <t>
   The following terminology is used frequently in this document.  Items
   preceded with a "+" may be part of the state maintained as TCP connection
   state in the TCB of associated connections TCB and are the focus of sharing as
   described in this document.  Note that terms are used as originally
   introduced where possible; in some cases, direction is indicated with a
   suffix (_S for send, _R for receive) and in other cases spelled out
   (sendcwnd).

   <list style="hanging" hangIndent="6">

	<t hangText="+cwnd:">TCP

      </t>

      <dl newline="false" spacing="normal" indent="6">
        <dt>+cwnd:</dt>
        <dd>TCP congestion window size <xref target="RFC5681"/></t>

	<t hangText="host:">a target="RFC5681" format="default"/></dd>
        <dt>host:</dt>
        <dd>a source or sink of TCP segments associated with a single IP
	address</t>

	<t hangText="host-pair:">a
	address</dd>
        <dt>host-pair:</dt>
        <dd>a pair of hosts and their corresponding IP addresses</t>

	<t hangText="+MMS_R:">maximum addresses</dd>
        <dt>ISN:
        </dt>
	<dd>Initial Sequence Number
	</dd>
	<dt>+MMS_R:</dt>
        <dd>maximum message size that can be received, the largest
	received transport payload of an IP datagram <xref target="RFC1122"/></t>

	<t hangText="+MMS_S:">maximum target="RFC1122" format="default"/></dd>
        <dt>+MMS_S:</dt>
        <dd>maximum message size that can be sent, the largest
	transmitted transport payload of an IP datagram <xref target="RFC1122"/></t>

	<t hangText="path:">an target="RFC1122" format="default"/></dd>
        <dt>path:</dt>
        <dd>an Internet path between the IP addresses of two hosts</t>

	<t hangText="PCB:">protocol hosts</dd>

        <dt>PCB:</dt>
        <dd>protocol control block, the data associated with a protocol as
        maintained by an endpoint; a TCP PCB is called a TCB
	PLPMTUD - packetization-layer "TCB"</dd>
        <dt>PLPMTUD:</dt><dd>packetization-layer path MTU discovery, a mechanism that
        uses transport packets to discover the PMTU Path Maximum Transmission Unit (PMTU) <xref
	target="RFC4821"/></t>

	<t hangText="+PMTU:">largest target="RFC4821"
        format="default"/></dd>
        <dt>+PMTU:</dt>
        <dd>largest IP datagram that can traverse a path
	<xref target="RFC1191"/><xref target="RFC8201"/></t>

	<t hangText="PMTUD:">path-layer target="RFC1191" format="default"/> <xref target="RFC8201" format="default"/></dd>
        <dt>PMTUD:</dt>
        <dd>path-layer MTU discovery, a mechanism that
	relies on ICMP error messages to discover the PMTU <xref
	target="RFC1191"/><xref target="RFC8201"/></t>

	<t hangText="+RTT:">round-trip target="RFC1191" format="default"/> <xref target="RFC8201" format="default"/></dd>
        <dt>+RTT:</dt>
        <dd>round-trip time of a TCP packet exchange <xref
	target="RFC0793"/></t>

	<t hangText="+RTTVAR:">variation target="RFC0793" format="default"/></dd>
        <dt>+RTTVAR:</dt>
        <dd>variation of round-trip times of a TCP packet
	exchange <xref target="RFC6298"/></t>

	<t hangText="+rwnd:">TCP target="RFC6298" format="default"/></dd>
        <dt>+rwnd:</dt>
        <dd>TCP receive window size <xref
	target="RFC5681"/></t>

	<t hangText="+sendcwnd:">TCP target="RFC5681" format="default"/></dd>
        <dt>+sendcwnd:</dt>
        <dd>TCP send-side congestion window (cwnd) size
	<xref target="RFC5681"/></t>

	<t hangText="+sendMSS:">TCP target="RFC5681" format="default"/></dd>
        <dt>+sendMSS:</dt>
        <dd>TCP maximum segment size, a value
	transmitted in a TCP option that represents the largest TCP user data
	payload that can be received <xref target="RFC6691"/></t>

	<t hangText="+ssthresh:">TCP target="RFC6691" format="default"/></dd>
        <dt>+ssthresh:</dt>
        <dd>TCP slow-start threshold <xref
	target="RFC5681"/></t>

	<t hangText="TCB:">TCP target="RFC5681" format="default"/></dd>
        <dt>TCB:</dt>
        <dd>TCP Control Block, the data associated with a TCP
	connection as maintained by an endpoint</t>

	<t hangText="TCP-AO:">TCP endpoint</dd>
        <dt>TCP-AO:</dt>
        <dd>TCP Authentication Option <xref
	target="RFC5925"/></t>

	<t hangText="TFO:">TCP target="RFC5925" format="default"/></dd>
        <dt>TFO:</dt>
        <dd>TCP Fast Open option <xref target="RFC7413"/></t>

	<t hangText="+TFO_cookie:">TCP target="RFC7413" format="default"/></dd>
        <dt>+TFO_cookie:</dt>
        <dd>TCP Fast Open cookie, state that is used
	as part of the TFO mechanism, when TFO is supported <xref
	target="RFC7413"/></t>

	<t hangText="+TFO_failure:">an target="RFC7413" format="default"/></dd>
        <dt>+TFO_failure:</dt>
        <dd>an indication of when TFO option
	negotiation failed, when TFO is supported</t>

	<t hangText="+TFOinfo:">information supported</dd>
        <dt>+TFOinfo:</dt>
        <dd>information cached when a TFO connection is
	established, which includes the TFO_cookie <xref
	target="RFC7413"/></t>

   </list>
 </t> target="RFC7413" format="default"/></dd>
      </dl>
    </section>
    <section title="The anchor="sect-4" numbered="true" toc="default">
      <name>The TCP Control Block (TCB)" anchor="sect-4"><t> (TCB)</name>
      <t>
   A TCB describes the data associated with each connection, i.e., with
   each association of a pair of applications across the network. The
   TCB contains at least the following information <xref target="RFC0793"/>:</t>

<figure><artwork><![CDATA[
        Local target="RFC0793" format="default"/>:</t>

<ul empty="true">
        <li><t>Local process state
            pointers state</t>
            <ul empty="true" spacing="compact">
            <li>pointers to send and receive buffers
            pointers buffers</li>
            <li>pointers to retransmission queue and current segment
            pointers segment</li>
            <li>pointers to Internet Protocol (IP) PCB
        Per-connection PCB</li>
            </ul>
        </li>
        <li><t>Per-connection shared state
            macro-state
                connection state
                timers
                flags
                local state</t>
            <ul empty="true" spacing="compact">
            <li><t>macro-state</t>
                <ul empty="true" spacing="compact">
                <li>connection state</li>
                <li>timers</li>
                <li>flags</li>
                <li>local and remote host numbers and ports
                TCP ports</li>
                <li>TCP option state
            micro-state
                send state</li>
                </ul>
            </li>
            <li><t>micro-state</t>
                <ul empty="true" spacing="compact">
                <li>send and receive window state (size*, current number)
                congestion number)</li>
                <li>congestion window size (sendcwnd)*
                congestion (sendcwnd)*</li>
                <li>congestion window size threshold (ssthresh)*
                max (ssthresh)*</li>
                <li>max window size seen*
                sendMSS#
                MMS_S#
                MMS_R#
                PMTU#
                round-trip seen*</li>
                <li>sendMSS#</li>
                <li>MMS_S#</li>
                <li>MMS_R#</li>
                <li>PMTU#</li>
                <li>round-trip time and its variation#
]]></artwork></figure> variation#</li>
                </ul>
            </li>
            </ul>
        </li>
</ul>

      <t>
   The per-connection information is shown as split into macro-state and
   micro-state, terminology borrowed from <xref target="Co91"/>. target="Co91"
   format="default"/>. Macro-state describes the protocol for establishing the
   initial shared state about the connection; we include the endpoint numbers
   and components (timers, flags) required upon commencement that are later
   used to help maintain that state. Micro-state describes the protocol after
   a connection has been established, to maintain the reliability and
   congestion control of the data transferred in the connection.</t>

      <t>
   We distinguish two other classes of shared micro-state that are associated
   more with host-pairs than with application pairs.

   One class is clearly host-pair dependent (shown above as "#", e.g.,
   sendMSS, MMS_R, MMS_S, PMTU, RTT), because these parameters are defined by
   the endpoint or endpoint pair (sendMSS, (of the given example: sendMSS, MMS_R, MMS_S,
   RTT) or are already cached and shared on that basis (PMTU (of the given example:
   PMTU <xref target="RFC1191" format="default"/> <xref target="RFC1191"/><xref target="RFC4821"/>). target="RFC4821"
   format="default"/>).

The other is host-pair dependent in its aggregate (shown above as "*", e.g.,
congestion window information, current window sizes, etc.) because they depend
on the total capacity between the two endpoints.</t>
      <t>
   Not all of the TCB state is necessarily sharable. shareable. In particular,
   some TCP options are negotiated only upon request by the application
   layer, so their use may not be correlated across connections. Other
   options negotiate connection-specific parameters, which are
   similarly not shareable. These are discussed further in Appendix B.</t> <xref target="sect-b"/>.</t>
      <t>
   Finally, we exclude rwnd from further discussion because its value
   should depend on the send window size, so it is already addressed by
   send window sharing and is not independently affected by sharing.</t>
    </section>
    <section title="TCB Interdependence" anchor="sect-5"><t> anchor="sect-5" numbered="true" toc="default">
      <name>TCB Interdependence</name>
      <t>
   There are two cases of TCB interdependence. Temporal sharing occurs
   when the TCB of an earlier (now CLOSED) connection to a host is used
   to initialize some parameters of a new connection to that same host,
   i.e., in sequence. Ensemble sharing occurs when a currently active
   connection to a host is used to initialize another (concurrent)
   connection to that host.</t>
    </section>
    <section title="Temporal Sharing" anchor="sect-6"><t> anchor="sect-6" numbered="true" toc="default">
      <name>Temporal Sharing</name>

      <t>
   The TCB data cache is accessed in two ways: it is read to initialize
   new TCBs and written when more current per-host state is available.</t>
      <section title="Initialization anchor="sect-6.1" numbered="true" toc="default">
        <name>Initialization of a new TCB" anchor="sect-6.1"><t> New TCB</name>
        <t>
   TCBs for new connections can be initialized using cached context
   from past connections as follows:</t>

<figure><artwork><![CDATA[
                   TEMPORAL SHARING

<table anchor="TCB_initialization">
  <name>Temporal Sharing - TCB Initialization

                  Cached TCB     New TCB
                  --------------------------------------
                  old_MMS_S      old_MMS_S or not cached*

                  old_MMS_R      old_MMS_R or not cached*

                  old_sendMSS    old_sendMSS

                  old_PMTU       old_PMTU+

                  old_RTT        old_RTT

                  old_RTTVAR     old_RTTVAR

                  old_option     (option specific)

                  old_ssthresh   old_ssthresh

                  old_sendcwnd   old_sendcwnd
]]></artwork></figure>

	<t>
   +Note Initialization</name>
  <thead>
    <tr>
      <th>Cached TCB</th>
      <th>New TCB</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_MMS_S</td>
      <td>old_MMS_S or not cached (2)</td>
    </tr>
    <tr>
      <td>old_MMS_R</td>
      <td>old_MMS_R or not cached (2)</td>
    </tr>
    <tr>
      <td>old_sendMSS</td>
      <td>old_sendMSS</td>
    </tr>
    <tr>
      <td>old_PMTU</td>
      <td>old_PMTU (1)</td>
    </tr>
    <tr>
      <td>old_RTT</td>
      <td>old_RTT</td>
    </tr>
    <tr>
      <td>old_RTTVAR</td>
      <td>old_RTTVAR</td>
    </tr>
    <tr>
      <td>old_option</td>
      <td>(option specific)</td>
    </tr>
    <tr>
      <td>old_ssthresh</td>
      <td>old_ssthresh</td>
    </tr>
    <tr>
      <td>old_sendcwnd</td>
      <td>old_sendcwnd</td>
    </tr>
  </tbody>
</table>

<dl>
<dt>(1)</dt><dd>Note that PMTU is cached at the IP layer <xref target="RFC1191"/><xref target="RFC4821"/>.
   *Note target="RFC1191" format="default"/> <xref target="RFC4821" format="default"/>.
</dd>
<dt>(2)</dt><dd>Note that some values are not cached when they are computed locally
   (MMS_R) or indicated in the connection itself (MMS_S in the SYN).</t> SYN).</dd>
</dl>
        <t>
   The table below

   <xref target="Option_Info_Initialization"/> gives an overview of
   option-specific information that can be shared. Additional information on
   some specific TCP options and sharing is provided in Appendix B.</t>

	<figure><artwork><![CDATA[
               TEMPORAL SHARING <xref
   target="sect-b"/>.</t>

<table anchor="Option_Info_Initialization">
  <name>Temporal Sharing - Option Info Initialization

                 Cached               New
                 ------------------------------------
                 old_TFO_cookie       old_TFO_cookie

                 old_TFO_failure      old_TFO_failure
]]></artwork>
	</figure> Initialization</name>
  <thead>
    <tr>
      <th>Cached</th>
      <th>New</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_TFO_cookie</td>
      <td>old_TFO_cookie</td>
    </tr>
    <tr>
      <td>old_TFO_failure</td>
      <td>old_TFO_failure</td>
    </tr>
  </tbody>
</table>

      </section>
      <section title="Updates anchor="sect-6.2" numbered="true" toc="default">
        <name>Updates to the TCB cache" anchor="sect-6.2"><t> Cache</name>
        <t>
   During a connection, the TCB cache can be updated based on events of
   current connections and their TCBs as they progress over time, as shown below:</t>

	<figure><artwork><![CDATA[
                     TEMPORAL SHARING in
   <xref target="Cache_Updates"/>.</t>

<table anchor="Cache_Updates">
  <name>Temporal Sharing - Cache Updates

         Cached TCB     Current TCB    when?    New Updates</name>
  <thead>
    <tr>
      <th>Cached TCB</th>
      <th>Current TCB</th>
      <th>When?</th>
      <th>New Cached TCB
         ----------------------------------------------------------
         old_MMS_S      curr_MMS_S     OPEN     curr_MMS_S

         old_MMS_R      curr_MMS_R     OPEN     curr_MMS_R

         old_sendMSS    curr_sendMSS   MSSopt   curr_sendMSS

         old_PMTU       curr_PMTU      PMTUD+ TCB</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_MMS_S</td>
      <td>curr_MMS_S</td>
      <td>OPEN</td>
      <td>curr_MMS_S</td>
    </tr>
    <tr>
      <td>old_MMS_R</td>
      <td>curr_MMS_R</td>
      <td>OPEN</td>
      <td>curr_MMS_R</td>
    </tr>
    <tr>
      <td>old_sendMSS</td>
      <td>curr_sendMSS</td>
      <td>MSSopt</td>
      <td>curr_sendMSS</td>
    </tr>
    <tr>
      <td>old_PMTU</td>
      <td>curr_PMTU</td>
      <td>PMTUD (1) / curr_PMTU
                                       PLPMTUD+

         old_RTT        curr_RTT       CLOSE    merge(curr,old)

         old_RTTVAR     curr_RTTVAR    CLOSE    merge(curr,old)

         old_option     curr_option    ESTAB    (depends on option)

         old_ssthresh   curr_ssthresh  CLOSE    merge(curr,old)

         old_sendcwnd   curr_sendcwnd  CLOSE    merge(curr,old)
]]></artwork>
	</figure>

	<t>
   +Note PLPMTUD (1)</td>
      <td>curr_PMTU</td>
    </tr>
    <tr>
      <td>old_RTT</td>
      <td>curr_RTT</td>
      <td>CLOSE</td>
      <td>merge(curr,old)</td>
    </tr>
    <tr>
      <td>old_RTTVAR</td>
      <td>curr_RTTVAR</td>
      <td>CLOSE</td>
      <td>merge(curr,old)</td>
    </tr>
    <tr>
      <td>old_option</td>
      <td>curr_option</td>
      <td>ESTAB</td>
      <td>(depends on option)</td>
    </tr>
    <tr>
      <td>old_ssthresh</td>
      <td>curr_ssthresh</td>
      <td>CLOSE</td>
      <td>merge(curr,old)</td>
    </tr>
    <tr>
      <td>old_sendcwnd</td>
      <td>curr_sendcwnd</td>
      <td>CLOSE</td>
      <td>merge(curr,old)</td>
    </tr>
  </tbody>
</table>

<dl>
<dt>(1)</dt><dd>Note that PMTU is cached at the IP layer <xref target="RFC1191"/><xref target="RFC4821"/>.</t> target="RFC1191"
format="default"/> <xref target="RFC4821" format="default"/>.</dd>
</dl>
        <t>
   Merge() is the function that combines the current and previous (old)
   values and may vary for each parameter of the TCB cache. The
   particular function is not specified in this document; examples
   include windowed averages (mean of the past N values, for some N)
   and exponential decay (new = (1-alpha)*old + alpha *new, where alpha
   is in the range [0..1]).</t>
        <t>
   The table below
   <xref target="Option_Info_Updates"/> gives an overview of option-specific
   information that can be similarly shared. The TFO cookie is maintained
   until the client explicitly requests it be updated as a separate event.</t>

<figure><artwork><![CDATA[
                  TEMPORAL SHARING

<table anchor="Option_Info_Updates">
  <name>Temporal Sharing - Option Info Updates

         Cached           Current          when?   New Cached
         ---------------------------------------------------------
         old_TFO_cookie   old_TFO_cookie   ESTAB   old_TFO_cookie

         old_TFO_failure  old_TFO_failure  ESTAB   old_TFO_failure
]]></artwork>
	</figure> Updates</name>
  <thead>
    <tr>
      <th>Cached</th>
      <th>Current</th>
      <th>When?</th>
      <th>New Cached</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_TFO_cookie</td>
      <td>old_TFO_cookie</td>
      <td>ESTAB</td>
      <td>old_TFO_cookie</td>
    </tr>
    <tr>
      <td>old_TFO_failure</td>
      <td>old_TFO_failure</td>
      <td>ESTAB</td>
      <td>old_TFO_failure</td>
    </tr>
  </tbody>
</table>

      </section>
      <section title="Discussion" anchor="sect-6.3"><t> anchor="sect-6.3" numbered="true" toc="default">
        <name>Discussion</name>
        <t>
   As noted, there is no particular benefit to caching MMS_S and MMS_R as
   these are reported by the local IP stack. Caching sendMSS and PMTU is
   trivial; reported values are cached (PMTU at the IP layer), and the most
   recent values are used. The cache is updated when the MSS option is
   received in a SYN or after PMTUD (i.e., when an ICMPv4 Fragmentation Needed
   <xref target="RFC1191"/> target="RFC1191" format="default"/> or ICMPv6 Packet Too Big message
   is received <xref target="RFC8201"/> target="RFC8201" format="default"/> or the equivalent is
   inferred, e.g., as from PLPMTUD <xref target="RFC4821"/>), target="RFC4821" format="default"/>),
   respectively, so the cache always has the most recent values from any
   connection. For sendMSS, the cache is consulted only at connection
   establishment and not otherwise updated, which means that MSS options do
   not affect current connections. The default sendMSS is never saved; only
   reported MSS values update the cache, so an explicit override is required
   to reduce the sendMSS. Cached sendMSS affects only data sent in the SYN
   segment, i.e., during client connection initiation or during simultaneous
   open; the MSS of all other segment MSS segments are based on constrained by the value updated as
   included in the SYN.</t> SYN.
</t>

        <t>
   RTT values are updated by formulae that merges merge the old and new values, as
   noted in <xref target="sect-6.2"/>. target="sect-6.2" format="default"/>. Dynamic RTT estimation
   requires a sequence of RTT measurements. As a result, the cached RTT (and
   its variation) is an average of its previous value with the contents of the
   currently active TCB for that host, when a TCB is closed. RTT values are
   updated only when a connection is closed. The method for merging old and
   current values needs to attempt to reduce the transient effects of the new
   connections.</t>
        <t>
   The updates for RTT, RTTVAR RTTVAR, and ssthresh rely on existing
   information, i.e., old values. Should no such values exist, the
   current values are cached instead.</t>
        <t>
   TCP options are copied or merged depending on the details of each
   option. E.g., For example, TFO state is updated when a connection is established
   and read before establishing a new connection.</t>

        <t>
Sections 8 <xref target="sect-8" format="counter"/> and 9 <xref target="sect-9"
format="counter"/> discuss compatibility issues and implications of sharing
the specific information listed above. <xref target="sect-10"/> target="sect-10"
format="default"/> gives an overview of known implementations.</t>
        <t>
   Most cached TCB values are updated when a connection closes. The exceptions
   are MMS_R and MMS_S, which are reported by IP <xref target="RFC1122"/>,
   PMTU target="RFC1122"
   format="default"/>; PMTU, which is updated after Path MTU Discovery and
   also reported by IP <xref target="RFC1191"/><xref target="RFC4821"/><xref target="RFC8201"/>, target="RFC1191" format="default"/> <xref
   target="RFC4821" format="default"/> <xref target="RFC8201"
   format="default"/>; and sendMSS, which is updated if the MSS option is
   received in the TCP SYN header.</t>
        <t>
   Sharing sendMSS information affects only data in the SYN of the next
   connection, because sendMSS information is typically included in
   most TCP SYN segments. Caching PMTU can accelerate the efficiency of
   PMTUD but can also result in black-holing until corrected if in
   error. Caching MMS_R and MMS_S may be of little direct value as they
   are reported by the local IP stack anyway.</t>

        <t>
   The way in which state related to other TCP option state options can be shared depends on the
   details of that option. E.g., For example, TFO state includes the TCP Fast Open
   Cookie
   cookie <xref target="RFC7413"/> target="RFC7413" format="default"/> or, in case TFO fails, a negative TCP Fast Open
   response. RFC 7413 states, "The </t>

<blockquote>The client MUST <bcp14>MUST</bcp14> cache negative responses from the server in order to avoid potential connection failures. Negative responses include the server not acknowledging the data in the SYN, ICMP error messages, and (most importantly) no response (SYN-ACK) from the server at all, i.e., connection timeout." [RFC 7413]. TFOinfo timeout.
</blockquote>
<t>TFOinfo is cached when a connection is established.</t>
        <t>
   Other
   State related to other TCP option state options might not be as readily cached. E.g., For
   example, TCP-AO <xref target="RFC5925"/> target="RFC5925" format="default"/> success or
   failure between a host pair host-pair for a single SYN destination port might be
   usefully cached. TCP-AO success or failure to other SYN destination ports
   on that host pair host-pair is never useful to cache because TCP-AO security
   parameters can vary per service.</t>
      </section>
    </section>
    <section title="Ensemble Sharing" anchor="sect-7"><t> anchor="sect-7" numbered="true" toc="default">
      <name>Ensemble Sharing</name>
      <t>
   Sharing cached TCB data across concurrent connections requires
   attention to the aggregate nature of some of the shared state. For
   example, although MSS and RTT values can be shared by copying, it
   may not be appropriate to simply copy congestion window or ssthresh
   information; instead, the new values can be a function (f) of the
   cumulative values and the number of connections (N).</t>
      <section title="Initialization anchor="sect-7.1" numbered="true" toc="default">
        <name>Initialization of a new TCB" anchor="sect-7.1"><t> New TCB</name>
        <t>
   TCBs for new connections can be initialized using cached context
   from concurrent connections as follows:</t>

<figure><artwork><![CDATA[
                   ENSEMBLE SHARING

<table anchor="TCB_Initialization">
  <name>Ensemble Sharing - TCB Initialization

                Cached TCB          New TCB
                ------------------------------------------
                old_MMS_S           old_MMS_S

                old_MMS_R           old_MMS_R

                old_sendMSS         old_sendMSS

                old_PMTU            old_PMTU+

                old_RTT             old_RTT

                old_RTTVAR          old_RTTVAR

                sum(old_ssthresh)   f(sum(old_ssthresh), N)

                sum(old_sendcwnd)   f(sum(old_sendcwnd), N)
_
                old_option          (option specific)
]]></artwork>
	</figure>

	<t>
   +Note Initialization</name>
  <thead>
    <tr>
      <th>Cached TCB</th>
      <th>New TCB</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_MMS_S</td>
      <td>old_MMS_S</td>
    </tr>
    <tr>
      <td>old_MMS_R</td>
      <td>old_MMS_R</td>
    </tr>
 <tr>
      <td>old_sendMSS</td>
      <td>old_sendMSS</td>
    </tr>
 <tr>
      <td>old_PMTU</td>
      <td>old_PMTU (1)</td>
    </tr>
 <tr>
      <td>old_RTT</td>
      <td>old_RTT</td>
    </tr>
 <tr>
      <td>old_RTTVAR</td>
      <td>old_RTTVAR</td>
    </tr>
 <tr>
      <td>sum(old_ssthresh)</td>
      <td>f(sum(old_ssthresh), N)</td>
    </tr>
 <tr>
      <td>sum(old_sendcwnd)</td>
      <td>f(sum(old_sendcwnd), N)</td>
    </tr>
 <tr>
      <td>old_option</td>
      <td>(option specific)</td>
    </tr>
  </tbody>
</table>

<dl>
<dt>(1)</dt>
<dd>Note that PMTU is cached at the IP layer <xref target="RFC1191"/><xref target="RFC4821"/>.</t> target="RFC1191" format="default"/> <xref target="RFC4821" format="default"/>.</dd>
</dl>
        <t>
   In the table, <xref target="TCB_Initialization"/>, the cached sum() is a total across all active
   connections because these parameters act in aggregate; similarly similarly, f()
   is a function that updates that sum based on the new connection's
   values, represented as "N".</t>
        <t>
   The table below
   <xref target="Ensemble_Option_Info_Initialization"/> gives an overview of
   option-specific information that can be similarly shared. Again, The the
   TFO_cookie is updated upon explicit client request, which is a separate
   event.</t>

<figure><artwork><![CDATA[
               ENSEMBLE SHARING

<table anchor="Ensemble_Option_Info_Initialization">
  <name>Ensemble Sharing - Option Info Initialization

                  Cached               New
                  ------------------------------------
                  old_TFO_cookie       old_TFO_cookie

                  old_TFO_failure      old_TFO_failure
]]></artwork>
	</figure> Initialization</name>
  <thead>
    <tr>
      <th>Cached</th>
      <th>New</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_TFO_cookie</td>
      <td>old_TFO_cookie</td>
    </tr>
    <tr>
      <td>old_TFO_failure</td>
      <td>old_TFO_failure</td>
    </tr>
  </tbody>
</table>

      </section>
      <section title="Updates anchor="sect-7.2" numbered="true" toc="default">
        <name>Updates to the TCB cache" anchor="sect-7.2"><t> Cache</name>
        <t>
   During a connection, the TCB cache can be updated based on changes to
   concurrent connections and their TCBs, as shown below:</t>

<figure><artwork><![CDATA[
                     ENSEMBLE SHARING

<table anchor="Ensemble_Cache_Updates">
  <name>Ensemble Sharing - Cache Updates Updates</name>
  <thead>
    <tr>
      <th>Cached TCB</th>
      <th>Current TCB</th>
      <th>When?</th>
      <th>New Cached TCB   Current TCB   when?      New Cached TCB
      ---------------------------------------------------------------
      old_MMS_S    curr_MMS_S    OPEN       curr_MMS_S

      old_MMS_R    curr_MMS_R    OPEN       curr_MMS_R

      old_sendMSS  curr_sendMSS  MSSopt     curr_sendMSS

      old_PMTU     curr_PMTU     PMTUD+ TCB</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_MMS_S</td>
      <td>curr_MMS_S</td>
      <td>OPEN</td>
      <td>curr_MMS_S</td>
    </tr>
    <tr>
      <td>old_MMS_R</td>
      <td>curr_MMS_R</td>
      <td>OPEN</td>
      <td>curr_MMS_R</td>
    </tr>
    <tr>
      <td>old_sendMSS</td>
      <td>curr_sendMSS</td>
      <td>MSSopt</td>
      <td>curr_sendMSS</td>
    </tr>
    <tr>
      <td>old_PMTU</td>
      <td>curr_PMTU</td>
      <td>PMTUD+ /   curr_PMTU
                                 PLPMTUD+

      old_RTT      curr_RTT      update     rtt_update(old, curr)

      old_RTTVAR   curr_RTTVAR   update     rtt_update(old, curr)

      old_ssthresh curr_ssthresh update     adjust PLPMTUD+</td>
      <td>curr_PMTU</td>
    </tr>
    <tr>
      <td>old_RTT</td>
      <td>curr_RTT</td>
      <td>update</td>
      <td>rtt_update(old, curr)</td>
    </tr>
    <tr>
      <td>old_RTTVAR</td>
      <td>curr_RTTVAR</td>
      <td>update</td>
      <td>rtt_update(old, curr)</td>
    </tr>
    <tr>
      <td>old_ssthresh</td>
      <td>curr_ssthresh</td>
      <td>update</td>
      <td>adjust sum as appropriate

      old_sendcwnd curr_sendcwnd update     adjust appropriate</td>
    </tr>
    <tr>
      <td>old_sendcwnd</td>
      <td>curr_sendcwnd</td>
      <td>update</td>
      <td>adjust sum as appropriate

      old_option   curr_option   (depends)  (option specific)
]]></artwork>
	</figure>

	<t>
   +Note appropriate</td>
    </tr>
    <tr>
      <td>old_option</td>
      <td>curr_option</td>
      <td>(depends)</td>
      <td>(option specific)</td>
    </tr>
  </tbody>
</table>

<dl>
<dt>+</dt>
<dd>Note that the PMTU is cached at the IP layer <xref target="RFC1191"/><xref target="RFC4821"/>.</t> target="RFC1191" format="default"/> <xref target="RFC4821" format="default"/>.</dd>
</dl>
        <t>
   In the table, <xref target="Ensemble_Cache_Updates"/>, rtt_update() is the function
   used to combine old and current values, e.g., as a windowed average or
   exponentially decayed average.</t>
        <t>
   The table below
	  <xref target="Ensemble_Option_Info_Updates"/> gives an overview of option-specific information
   that can be similarly shared.</t>

<figure><artwork><![CDATA[
                ENSEMBLE SHARING

<table anchor="Ensemble_Option_Info_Updates">
  <name>Ensemble Sharing - Option Info Updates

       Cached          Current          when?   New Cached
       ----------------------------------------------------------
       old_TFO_cookie  old_TFO_cookie   ESTAB   old_TFO_cookie

       old_TFO_failure old_TFO_failure  ESTAB   old_TFO_failure
]]></artwork></figure> Updates</name>
  <thead>
    <tr>
      <th>Cached</th>
      <th>Current</th>
      <th>When?</th>
      <th>New Cached</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_TFO_cookie</td>
      <td>old_TFO_cookie</td>
      <td>ESTAB</td>
      <td>old_TFO_cookie</td>
    </tr>
    <tr>
      <td>old_TFO_failure</td>
      <td>old_TFO_failure</td>
      <td>ESTAB</td>
      <td>old_TFO_failure</td>
    </tr>
  </tbody>
</table>

      </section>
      <section title="Discussion" anchor="sect-7.3"><t> anchor="sect-7.3" numbered="true" toc="default">
        <name>Discussion</name>

        <t>
   For ensemble sharing, TCB information should be cached as early as
   possible, sometimes before a connection is closed. Otherwise,
   opening multiple concurrent connections may not result in TCB data
   sharing if no connection closes before others open. The amount of
   work involved in updating the aggregate average should be minimized,
   but the resulting value should be equivalent to having all values
   measured within a single connection.

The function "rtt_update" in
   the ensemble sharing table <xref target="Ensemble_Cache_Updates"
format="default"/> indicates this operation, which occurs whenever the RTT
would have been updated in the individual TCP connection. As a result, the
cache contains the shared RTT variables, which no longer need to reside in the
TCB.</t>
        <t>
   Congestion window size and ssthresh aggregation are more complicated
   in the concurrent case. When there is an ensemble of connections, we
   need to decide how that ensemble would have shared these variables,
   in order to derive initial values for new TCBs.</t>
        <t>
Sections 8 <xref target="sect-8" format="counter"/> and 9 <xref target="sect-9"
format="counter"/> discuss compatibility issues and implications of sharing
the specific information listed above.</t>
        <t>
   There are several ways to initialize the congestion window in a new TCB
   among an ensemble of current connections to a host. Current TCP
   implementations initialize it to four 4 segments as standard <xref target="RFC3390"/>
   target="RFC3390" format="default"/> and 10 segments experimentally <xref target="RFC6928"/>.
   target="RFC6928" format="default"/>. These approaches assume that new
   connections should behave as conservatively as possible.  The algorithm
   described in <xref target="Ba12"/> target="Ba12" format="default"/> adjusts the initial
   cwnd depending on the cwnd values of ongoing connections. It is also
   possible to use sharing mechanisms over long timescales to adapt TCP's
   initial window automatically, as described further in Appendix C.</t> <xref
   target="sect-c"/>.</t>
      </section>
    </section>
    <section title="Issues anchor="sect-8" numbered="true" toc="default">
      <name>Issues with TCB information sharing" anchor="sect-8"><t> Information Sharing</name>
      <t>
   Here, we discuss various types of problems that may arise with TCB
   information sharing.</t>
      <t>
   For the congestion and current window information, the initial
   values computed by TCB interdependence may not be consistent with
   the long-term aggregate behavior of a set of concurrent connections
   between the same endpoints.

   Under conventional TCP congestion control, if the congestion window of a
   single existing connection has converged to 40 segments, two newly joining
   concurrent connections will assume initial windows of 10 segments <xref target="RFC6928"/>,
   target="RFC6928"/> and the
   current existing connection's window doesn't will not decrease
   to accommodate this additional load and load. As a consequence, the three
   connections can mutually interfere.

   One example of this is seen on low-bandwidth, high-delay links, where
   concurrent connections supporting Web traffic can collide because their
   initial windows were too large, even when set at one 1 segment.</t>
      <t>
   The authors of <xref target="Hu12"/> target="Hu12" format="default"/> recommend caching
   ssthresh for temporal sharing only when flows are long. Some studies
   suggest that sharing ssthresh between short flows can deteriorate the
   performance of individual connections [Hu12, <xref target="Du16"/>], target="Hu12"/> <xref
   target="Du16" format="default"/>, although this may benefit aggregate
   network performance.</t>
      <section title="Traversing anchor="sect-8.1" numbered="true" toc="default">
        <name>Traversing the same network path" anchor="sect-8.1"><t> Same Network Path</name>
        <t>
   TCP is sometimes used in situations where packets of the same host-pair do
   not always take the same path, such as when connection- specific connection-specific parameters
   are used for routing (e.g., for load balancing).  Multipath routing that
   relies on examining transport headers, such as ECMP and LAG Link Aggregation
   Group (LAG) <xref target="RFC7424"/>, target="RFC7424" format="default"/>, may not result in
   repeatable path selection when TCP segments are encapsulated, encrypted, or
   altered - -- for example, in some Virtual Private Network (VPN) tunnels that
   rely on proprietary encapsulation. Similarly, such approaches cannot
   operate deterministically when the TCP header is encrypted, e.g., when
   using IPsec ESP Encapsulating Security Payload (ESP) (although TCB
   interdependence among the entire set sharing the same endpoint IP addresses
   should work without problems when the TCP header is encrypted). Measures to
   increase the probability that connections use the same path could be applied: e.g.,
   applied; for example, the connections could be given the same IPv6 flow
   label <xref target="RFC6437"/>. target="RFC6437" format="default"/>. TCB interdependence can
   also be extended to sets of host IP address pairs that share the same
   network path conditions, such as when a group of addresses is on the same
   LAN (see <xref target="sect-9"/>).</t> target="sect-9" format="default"/>).</t>

        <t>
   Traversing the same path is not important for host-specific information such as rwnd and
   (e.g., rwnd), TCP option state, such as TFOinfo, state (e.g., TFOinfo), or for information that is
   already cached per-host, such as per-host (e.g., path MTU. MTU).

   When TCB information is shared across different SYN destination
   ports, path-related information can be incorrect; however, the
   impact of this error is potentially diminished if (as discussed
   here) TCB sharing affects only the transient event of a connection
   start or if TCB information is shared only within connections to the
   same SYN destination port.</t>
        <t>
   In the case of Temporal Sharing, temporal sharing, TCB information could also become invalid
   over time, i.e., indicating that although the path remains the same, path
   properties have changed. Because this is similar to the case when a
   connection becomes idle, mechanisms that address idle TCP connections
   (e.g., <xref target="RFC7661"/>) target="RFC7661" format="default"/>) could also be applied to
   TCB cache management, especially when TCP Fast Open is used <xref target="RFC7413"/>.</t>
   target="RFC7413" format="default"/>.</t>
      </section>
      <section title="State dependence" anchor="sect-8.2"><t> anchor="sect-8.2" numbered="true" toc="default">
        <name>State Dependence</name>

        <t>
 There may be additional considerations to the way in which TCB
 interdependence rebalances congestion feedback among the current
   connections, e.g.,
 connections. For example, it may be appropriate to consider the impact of a
 connection being in Fast Recovery <xref target="RFC5681"/> target="RFC5681" format="default"/>
 or some other similar unusual feedback state, e.g., as inhibiting state that could inhibit or affecting affect the
 calculations described herein.</t> herein.
</t>
      </section>
      <section title="Problems anchor="sect-8.3" numbered="true" toc="default">
        <name>Problems with sharing based Sharing Based on IP address" anchor="sect-8.3"><t> Address</name>
        <t>

   It can be wrong to share TCB information between TCP connections on the
   same host as identified by the IP address if an IP address is assigned to a
   new host (e.g., IP address spinning, as is used by ISPs to inhibit running
   servers).

   It can be wrong if Network Address
   (and Port) Translation (NA(P)T) (NAT) <xref target="RFC2663"
   format="default"/>, Network Address and Port Translation (NAPT) <xref target="RFC2663"/>
   target="RFC2663" format="default"/>, or any other IP sharing mechanism is
   used.

   Such mechanisms are less likely to be used with IPv6. Other methods to
   identify a host could also be considered to make correct TCB sharing more
   likely. Moreover, some TCB information is about dominant path properties
   rather than the specific host. IP addresses may differ, yet the relevant
   part of the path may be the same.</t>
      </section>

    </section>
    <section title="Implications" anchor="sect-9"><t> anchor="sect-9" numbered="true" toc="default">
      <name>Implications</name>
      <t>
   There are several implications to incorporating TCB interdependence in TCP
   implementations. First, it may reduce the need for application-layer
   multiplexing for performance enhancement <xref target="RFC7231"/>. target="RFC7231" format="default"/>. Protocols like HTTP/2
   <xref target="RFC7540"/> target="RFC7540" format="default"/> avoid connection reestablishment re-establishment costs by serializing or
   multiplexing a set of per-host connections across a single TCP
   connection. This avoids TCP's per-connection OPEN handshake and also avoids
   recomputing the MSS, RTT, and congestion window values. By avoiding the
   so-called "slow-start restart", performance can be optimized <xref target="I-D.hughes-restart"/>. target="I-D.hughes-restart" format="default"/>. TCB
   interdependence can provide the "slow-start restart avoidance" of
   multiplexing, without requiring a multiplexing mechanism at the application
   layer.</t>
      <t>

   Like the initial version of this document <xref target="RFC2140"/>, target="RFC2140"
   format="default"/>, this update's approach to TCB interdependence focuses
   on sharing a set of TCBs by updating the TCB state to reduce the impact of
   transients when connections begin, end, or otherwise significantly change
   state.

   Other mechanisms have since been proposed to continuously share information
   between all ongoing communication (including connectionless protocols), updating protocols) and
   update the congestion state during any congestion-related event (e.g.,
   timeout, loss confirmation, etc.)  <xref target="RFC3124"/>. target="RFC3124"
   format="default"/>.

   By dealing exclusively with transients, the approach in this document is
   more likely to exhibit the "steady-state" behavior as unmodified,
   independent TCP connections.</t>
      <section title="Layering" anchor="sect-9.1"><t> anchor="sect-9.1" numbered="true" toc="default">
        <name>Layering</name>

        <t>

TCB interdependence pushes some of the TCP implementation from its typical
placement solely within the
   traditional transport layer (in the ISO model), model) to the
network layer.

   This acknowledges that some components of state is are, in fact fact, per-host-pair
   or can be per-path as indicated solely by that host-pair.

Transport protocols typically manage per-application-pair associations (per
stream), and network protocols manage per-host-pair and path associations
(routing). Round-trip time, MSS, and congestion information could be more
appropriately handled at the network layer, aggregated among concurrent
connections, and shared across connection instances <xref target="RFC3124"/>.</t> target="RFC3124"
format="default"/>.</t>
        <t>
   An earlier version of RTT sharing suggested implementing RTT state at the
   IP layer, layer rather than at the TCP layer. Our observations describe sharing
   state among TCP connections, which avoids some of the difficulties in an
   IP-layer solution. One such problem of an IP
   layer IP-layer solution is determining
   the correspondence between packet exchanges using IP header information
   alone, where such correspondence is needed to compute RTT. Because TCB
   sharing computes RTTs inside the TCP layer using TCP header information, it
   can be implemented more directly and simply than at the IP layer.  This is
   a case where information should be computed at the transport layer but
   could be shared at the network layer.</t>
      </section>
      <section title="Other possibilities" anchor="sect-9.2"><t> anchor="sect-9.2" numbered="true" toc="default">
        <name>Other Possibilities</name>
        <t>
   Per-host-pair associations are not the limit of these techniques. It is
   possible that TCBs could be similarly shared between hosts on a subnet or
   within a cluster, because the predominant path can be
   subnet-subnet, subnet-subnet rather
   than host-host. Additionally, TCB interdependence can be applied to any
   protocol with congestion state, including SCTP <xref target="RFC4960"/> target="RFC4960"
   format="default"/> and DCCP <xref target="RFC4340"/>, target="RFC4340" format="default"/>, as
   well as for to individual subflows in Multipath TCP <xref target="RFC8684"/>.</t> target="RFC8684"
   format="default"/>.</t>
        <t>

   There may be other information that can be shared between concurrent
   connections. For example, knowing that another connection has just
   tried to expand its window size and failed, a connection may not
   attempt to do the same for some period. The idea is that existing
   TCP implementations infer the behavior of all competing connections,
   including those within the same host or subnet. One possible
   optimization is to make that implicit feedback explicit, via
   extended information associated with the endpoint IP address and its
   TCP implementation, rather than per-connection state in the TCB.</t>
        <t>
   This document focuses on sharing TCB information at connection
   initialization. Subsequent to RFC 2140, there have been numerous approaches
   that attempt to coordinate ongoing state across concurrent connections,
   both within TCP and other congestion-reactive protocols, which are
   summarized in <xref target="Is18"/>. target="Is18" format="default"/>. These approaches are
   more complex to implement implement, and their comparison to steady-state TCP
   equivalence can be more difficult to establish, sometimes intentionally
   (i.e., they sometimes intend to provide a different kind of "fairness" than
   emerges from TCP operation).</t>
      </section>
    </section>

    <section title="Implementation Observations" anchor="sect-10"><t> anchor="sect-10" numbered="true" toc="default">
      <name>Implementation Observations</name>
      <t>
   The observation that some TCB state is host-pair specific rather than
   application-pair dependent is not new and is a common engineering decision
   in layered protocol implementations. Although now deprecated, T/TCP <xref target="RFC1644"/>
   target="RFC1644" format="default"/> was the first to propose using caches
   in order to maintain TCB states (see Appendix A).</t> <xref target="sect-a"/>).</t>
      <t>
   The table below
	<xref target="Known_Implementation_Status"/> describes the current
	implementation status for TCB temporal sharing in Windows as of
	December 2020, Apple variants (macOS, iOS, iPadOS, tvOS, and watchOS)
	as of January 2021, Linux kernel version 5.10.3, and FreeBSD
	12. Ensemble sharing is not yet implemented.</t>

<figure><artwork><![CDATA[
                        KNOWN

<table anchor="Known_Implementation_Status">
  <name>KNOWN IMPLEMENTATION STATUS

      TCB data      Status
      ------------------------------------------------------------
      old_MMS_S     Not shared

      old_MMS_R     Not shared

      old_sendMSS   Cached STATUS</name>
  <thead>
    <tr>
      <th>TCB data</th>
      <th>Status</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>old_MMS_S</td>
      <td>Not shared</td>
    </tr>
    <tr>
      <td>old_MMS_R</td>
      <td>Not shared</td>
    </tr>
    <tr>
      <td>old_sendMSS</td>
      <td>Cached and shared in Apple, Linux (MSS)

      old_PMTU      Cached (MSS)</td>
    </tr>
    <tr>
      <td>old_PMTU</td>
      <td>Cached and shared in Apple, FreeBSD, Windows (PMTU)

      old_RTT       Cached (PMTU)</td>
    </tr>
    <tr>
      <td>old_RTT</td>
      <td>Cached and shared in Apple, FreeBSD, Linux, Windows

      old_RTTVAR    Cached Windows</td>
    </tr>
    <tr>
      <td>old_RTTVAR</td>
      <td>Cached and shared in Apple, FreeBSD, Windows

      old_TFOinfo   Cached Windows</td>
    </tr>
    <tr>
      <td>old_TFOinfo</td>
      <td>Cached and shared in Apple, Linux, Windows

      old_sendcwnd  Not shared

      old_ssthresh  Cached Windows</td>
    </tr>
    <tr>
      <td>old_sendcwnd</td>
      <td>Not shared</td>
    </tr>
    <tr>
      <td>old_ssthresh</td>
      <td>Cached and shared in Apple, FreeBSD*, Linux*

      TFO failure   Cached Linux*</td>
    </tr>
    <tr>
      <td>TFO failure</td>
      <td>Cached and shared in Apple
]]></artwork>
	</figure>

	<t>
   In the table above, "Apple" refers to all Apple OSes, i.e.,
   desktop/laptop macOS, phone iOS, pad iPadOS, video player tvOS, and
   watch watchOS, which all share the same Internet protocol stack.</t>

	<t>
   *Note: Apple</td>
    </tr>
  </tbody>
</table>

<dl>
   <dt>*</dt>
   <dd>Note:

 In FreeBSD, new ssthresh is the mean of curr_ssthresh and its previous value
 if a previous value exists; in Linux, the calculation depends on state and is
 max(curr_cwnd/2, old_ssthresh) in most
   cases.</t> cases.</dd>
</dl>

<t>In <xref target="Known_Implementation_Status"/>, "Apple" refers to all
Apple OSes, i.e., macOS (desktop/laptop), iOS (phone), iPadOS (tablet), tvOS
(video player), and watchOS (smart watch), which all share the same Internet
protocol stack.
</t>

    </section>
    <section title="Changes anchor="sect-11" numbered="true" toc="default">
      <name>Changes Compared to RFC 2140" anchor="sect-11"><t> 2140</name>
      <t>
   This document updates the description of TCB sharing in RFC 2140 and its
   associated impact on existing and new connection state, providing a
   complete replacement for that document <xref target="RFC2140"/>. target="RFC2140"
   format="default"/>. It clarifies the previous description and terminology
   and extends the mechanism to its impact on new protocols and mechanisms,
   including multipath TCP, fast open, Fast Open, PLPMTUD, NAT, and the TCP
   Authentication Option.</t>

<t>
   The detailed impact on TCB state addresses TCB parameters in with greater
   detail, addressing
   specificity. It separates the way MSS is used in both the send and receive direction,
   directions, it separates the way both of these MSS
   and sendMSS separately, values differ from
   sendMSS, it adds both path MTU and ssthresh, and it addresses the impact on
   state associated with TCP option state.</t> options.
</t>

      <t>
   New sections have been added to address compatibility issues and
   implementation observations.

   The relation of this work to T/TCP has been moved to 0 on history, <xref
   target="sect-a"/> (which describes the history to TCB sharing) partly to
   reflect the deprecation of that protocol.</t> protocol.

</t>
      <t>
   Appendix C
   <xref target="sect-c"/> has been added to discuss the potential to use temporal
   sharing over long timescales to adapt TCP's initial window
   automatically, avoiding the need to periodically revise a single
   global constant value.</t>
      <t>
   Finally, this document updates and significantly expands the
   referenced literature.</t>
    </section>
    <section title="Security Considerations" anchor="sect-12"><t> anchor="sect-12" numbered="true" toc="default">
      <name>Security Considerations</name>
      <t>
   These presented implementation methods do not have additional ramifications
   for direct (connection-aborting or information
   injecting) information-injecting) attacks on
   individual connections. Individual connections, whether using sharing or
   not, also may be susceptible to denial-of-service attacks that reduce
   performance or completely deny connections and transfers if not otherwise
   secured.</t>
      <t>
   TCB sharing may create additional denial-of-service attacks that affect the
   performance of other connections by polluting the cached information. This
   can occur across whatever any set of connections where in which the TCB is shared,
   between connections in a single host, or between hosts if TCB sharing is
   implemented within a subnet (see
   Implications section). <xref target="sect-9"
   sectionFormat="bare">"Implications"</xref>). Some shared TCB parameters are
   used only to create new TCBs, TCBs; others are shared among the TCBs of ongoing
   connections. New connections can join the ongoing set, e.g., to optimize
   send window size among a set of connections to the same host. PMTU is
   defined as shared at the IP layer, layer and is already susceptible in this
   way.</t>
      <t>
   Options in client SYNs can be easier to forge than complete, two-way
   connections. As a result, their values may not be safely
   incorporated in shared values until after the three-way handshake
   completes.</t>
      <t>
   Attacks on parameters used only for initialization affect only the
   transient performance of a TCP connection. For short connections, the
   performance ramification can approach that of a denial-of-service
   attack. E.g., For example, if an application changes its TCB to have a false and small
   window size, subsequent connections will experience performance degradation
   until their window grew grows appropriately.</t>
      <t>
   TCB sharing reuses and mixes information from past and current
   connections. Although reusing information could create a potential
   for fingerprinting to identify hosts, the mixing reduces that
   potential. There has been no evidence of fingerprinting based on
   this technique technique, and it is currently considered safe in that regard.
   Further, information about the performance of a TCP connection has
   not been considered as private.</t>
    </section>
    <section title="IANA Considerations" anchor="sect-13"><t>
   There are no IANA implications or requests in this document.</t> anchor="sect-13" numbered="true" toc="default">
      <name>IANA Considerations</name>
      <t>
   This section should be removed upon final publication as an RFC.</t> document has no IANA actions.</t>

    </section>
  </middle>
  <back>
	<references title="Normative References">
	&RFC0793;
	&RFC1122;
	&RFC1191;
	&RFC2119;
	&RFC4821;
	&RFC5681;
	&RFC6298;
	&RFC7413;
	&RFC8174;
	&RFC8201;

<displayreference target="I-D.allman-tcpm-bump-initcwnd" to="Al10"/>
<displayreference target="I-D.ietf-tcpm-generalized-ecn" to="Ba20"/>
<displayreference target="I-D.hughes-restart" to="Hu01"/>

    <references>
      <name>References</name>
      <references>
        <name>Normative References</name>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0793.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1122.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1191.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4821.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5681.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6298.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7413.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8201.xml"/>
      </references>
	<references title="Informative References">
	&I-D.allman-tcpm-bump-initcwnd;
      <references>
        <name>Informative References</name>

        <xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.allman-tcpm-bump-initcwnd.xml"/>

        <reference anchor="Ba12"><front> anchor="Ba12">
          <front>
            <title>LISA: A Linked Slow-Start Algorithm linked slow-start algorithm for MPTCP</title>
            <author initials="R." surname="Barik" fullname="R. fullname="Runa Barik">
	</author>
            <author initials="M." surname="Welzl" fullname="M. fullname="Michael Welzl">
	</author>
            <author initials="S." surname="Ferlin" fullname="S. fullname="Simone Ferlin">
	</author>
            <author initials="O." surname="Alay" fullname="O. fullname="Ozgu Alay">
	</author>
            <date month="May" year="2016"/>
          </front>
<refcontent>IEEE ICC
</refcontent>
<seriesInfo name="IEEE" value="ICC"/> name="DOI" value="10.1109/ICC.2016.7510786"/>
        </reference>
	&I-D.ietf-tcpm-generalized-ecn;

        <xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.ietf-tcpm-generalized-ecn.xml"/>

        <reference anchor="Be94"><front> anchor="Be94">
          <front>
            <title>The World-Wide Web</title>
            <author initials="T." surname="Berners-Lee" fullname="T. fullname="Tim Berners-Lee">
	</author>
            <author initials="C." surname="Cailliau" fullname="Robert Cailliau"/>
	    <author initials="A." surname="Luotonen" fullname="Ari Luotonen"/>
	    <author initials="H." surname="Nielsen" fullname="Henrik Frystyk Nielsen"/>
	    <author initials="A." surname="Secret" fullname="Arthur Secret"/>

            <date month="August" year="1994"/>
          </front>
<seriesInfo name="Communications" value="of name="DOI" value="10.1145/179606.179671"/>
<refcontent>Communications of the ACM"/> ACM V37, pp. 76-82</refcontent>

        </reference>

        <reference anchor="Br94"><front> anchor="Br94">
          <front>
            <title>T/TCP -- Transaction TCP: Source Changes for Sun OS 4.1.3</title>
            <author initials="B." surname="Braden" fullname="B. fullname="Bob Braden">
	</author>
            <date month="September" year="1994"/>
         </front>
<refcontent>USC/ISI Release 1.0</refcontent>
        </reference>

        <reference anchor="Br02"><front> anchor="Br02">
          <front>
            <title>Understanding Internet Traffic Streams: Dragonflies traffic streams: dragonflies and Tortoises</title> tortoises</title>
            <author initials="N." initials="N" surname="Brownlee" fullname="N. fullname="Nevil Brownlee">
	</author>
            <author initials="K." initials="KC" surname="Claffy" fullname="K. fullname="KC Claffy">
	</author>
            <date year="2002"/>
          </front>
<seriesInfo name="IEEE" value="Communications Magazine p110-117"/> name="DOI" value="10.1109/MCOM.2002.1039865"/>
          <refcontent>IEEE Communications Magazine, pp. 110-117</refcontent>
        </reference>

        <reference anchor="Co91"><front> anchor="Co91">
          <front>
            <title>Internetworking with TCP/IP</title>
            <author initials="D." initials="D" surname="Comer" fullname="D. fullname="Douglas Comer">
	</author>
            <author initials="D." initials="D" surname="Stevens" fullname="D. fullname="David Stevens">
	</author>
            <date year="1991"/>
          </front>
<seriesInfo name='ISBN 10:' value='0134685059' />
<seriesInfo name='ISBN 13:' value='9780134685052' />
   </reference>

        <reference anchor="Du16"><front> anchor="Du16">
          <front>
            <title>Research Impacting the Practice of Congestion Control</title>
	<author>
	<organization>Dukkipati, N., Yuchung C. and V. Amin</organization>
	</author>
            <author initials="N" surname="Dukkipati" fullname="Nandita Dukkipati"/>
            <author initials="Y" surname="Cheng" fullname="Yuchung Cheng"/>
            <author initials="A" surname="Vahdat" fullname="Amin Vahdat"/>
            <date month="July" year="2016"/>
          </front>

	<seriesInfo name="ACM" value="SIGCOMM CCR editorial"/>
<refcontent>Computer Communication Review</refcontent>
<refcontent>The ACM SIGCOMM newsletter</refcontent>
        </reference>

        <reference anchor="FreeBSD" target="http://www.freebsd.org/"><front>
	<title>FreeBSD source code</title> target="https://www.freebsd.org/">
          <front>
            <title>The FreeBSD Project</title>
            <author>
<organization>FreeBSD</organization>
	</author>
            <date/>
          </front>
        </reference>
	&I-D.hughes-restart;

<reference anchor="Hu12"><front> anchor="I-D.hughes-restart">
<front>
<title>Issues in TCP Slow-Start Restart After Idle</title>

<author initials="A" surname="Hughes" fullname="Amy Hughes"/>
<author initials="J" surname="Touch" fullname="Joe Touch"/>
<author initials="J" surname="Heidemann" fullname="John Heidemann"/>

<date month="December" year="2001" />
</front>

<seriesInfo name="Internet-Draft" value="draft-hughes-restart-00" />
</reference>

        <reference anchor="Hu12">
          <front>
            <title>Enhanced metric caching for short TCP flows</title>
            <author initials="P." surname="Hurtig" fullname="P. fullname="Per Hurtig">
	</author>
            <author initials="A." surname="Brunstrom" fullname="A. fullname="Anna Brunstrom">
	</author>
            <date year="2012"/>
          </front>
<seriesInfo name="IEEE" value="International name="DOI" value="10.1109/ICC.2012.6364516"/>
<refcontent>IEEE International Conference on Communications"/> Communications</refcontent>
        </reference>

        <reference anchor="IANA" target="https://www.iana.org/assignments/tcp-parameters"><front>
	<title>IANA TCP Parameters (options) registry</title> target="https://www.iana.org/assignments/tcp-parameters">
          <front>
            <title>Transmission Control Protocol (TCP) Parameters</title>
            <author>
<organization>IANA</organization>
	</author>
            <date/>
          </front>
        </reference>

        <reference anchor="Is18"><front> anchor="Is18">
          <front>
            <title>ctrlTCP: Reducing Latency latency through Coupled Heterogeneous Multi-Flow coupled, heterogeneous
            multi-flow TCP Congestion Control</title> congestion control</title>
            <author initials="S." surname="Islam" fullname="S. fullname="Safiqul Islam">
	</author>
            <author initials="M." surname="Welzl" fullname="M. fullname="Michael Welzl">
	</author>
            <author initials="K." surname="Hiorth" fullname="K. fullname="Kristian Hiorth">
	</author>
            <author initials="D." surname="Hayes" fullname="D. fullname="David Hayes">
	</author>
            <author initials="G." surname="Armitage" fullname="G. fullname="Grenville Armitage">
	</author>
            <author initials="S." surname="Gjessing" fullname="S. fullname="Stein Gjessing">
	</author>
            <date month="April" year="2018"/>
          </front>
<seriesInfo name="Proc" value="IEEE name="DOI" value="10.1109/INFCOMW.2018.8406887"/>
          <refcontent>IEEE INFOCOM Global Internet Symposium GI workshop"/> 2018 - IEEE Conference on Computer
          Communications Workshops (INFOCOM WKSHPS)</refcontent>
        </reference>

        <reference anchor="Ja88"><front> anchor="Ja88">
          <front>
            <title>Congestion Avoidance and Control</title>
            <author initials="V." surname="Jacobson" fullname="V. fullname="Van Jacobson">
	</author>
            <author initials="M." surname="Karels" fullname="M. fullname="Michael Karels">
	</author>
            <date month="November" year="1988"/>
          </front>

	<seriesInfo name="Proc" value="Sigcomm"/>
	  <refcontent>SIGCOMM Symposium proceedings on Communications
	  architectures and protocols
	  </refcontent>
        </reference>
	&RFC1644;
	&RFC1379;
	&RFC2001;
	&RFC2140;
	&RFC2414;
	&RFC2663;
	&RFC3390;
	&RFC3124;
	&RFC4340;
	&RFC4960;
	&RFC5925;
	&RFC6437;
	&RFC6691;
	&RFC6928;
	&RFC7231;
	&RFC7323;
	&RFC7424;
	&RFC7540;
	&RFC7661;
	&RFC8684;

        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1644.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1379.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2001.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2140.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2414.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2663.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3390.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3124.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4340.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4960.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5925.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6437.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6691.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6928.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7231.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7323.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7424.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7540.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7661.xml"/>
        <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8684.xml"/>
      </references>
    </references>
    <section title="TCB anchor="sect-a" numbered="true" toc="default">
      <name>TCB Sharing History" anchor="sect-a"><t> History</name>
      <t>
   T/TCP proposed using caches to maintain TCB information across instances
   (temporal sharing), e.g., smoothed RTT, RTT variation,
   congestion avoidance congestion-avoidance
   threshold, and MSS <xref target="RFC1644"/>. target="RFC1644" format="default"/>. These values
   were in addition to connection counts used by T/TCP to accelerate data
   delivery prior to the full three-way handshake during an OPEN. The goal was
   to aggregate TCB components where they reflect one association - -- that of the host-pair,
   host-pair rather than artificially separating those components by
   connection.</t>
      <t>
   At least one T/TCP implementation saved the MSS and aggregated the
   RTT parameters across multiple connections but omitted caching the
   congestion window information <xref target="Br94"/>, target="Br94" format="default"/>, as originally specified in
   <xref target="RFC1379"/>. target="RFC1379" format="default"/>. Some T/TCP implementations immediately updated MSS when
   the TCP MSS header option was received <xref target="Br94"/>, target="Br94" format="default"/>, although this was not
   addressed specifically in the concepts or functional specification
   <xref target="RFC1379"/><xref target="RFC1644"/>. target="RFC1379" format="default"/> <xref target="RFC1644" format="default"/>. In later T/TCP implementations, RTT values were
   updated only after a CLOSE, which does not benefit concurrent
   sessions.</t>
      <t>
   Temporal sharing of cached TCB data was originally implemented in the SunOS Sun
   OS 4.1.3 T/TCP extensions <xref target="Br94"/> target="Br94" format="default"/> and the
   FreeBSD port of same <xref target="FreeBSD"/>. target="FreeBSD" format="default"/>. As
   mentioned before, only the MSS and RTT parameters were cached, as originally
   specified in <xref target="RFC1379"/>. target="RFC1379" format="default"/>. Later discussion of
   T/TCP suggested including congestion control parameters in this cache; for
   example, <xref target="RFC1644"/> (Section 3.1) target="RFC1644" sectionFormat="of" section="3.1"
   format="default"/> hints at initializing the congestion window to the old
   window size.</t>
    </section>
    <section title="TCP anchor="sect-b" numbered="true" toc="default">

      <name>TCP Option Sharing and Caching" anchor="sect-b"><t> Caching</name>
      <t>
   In addition to the options that can be cached and shared, this memo also
   lists known TCP options <xref target="IANA"/> target="IANA" format="default"/> for which
   state is unsafe to be kept. This list is not intended to be authoritative
   or exhaustive.</t>

<figure><artwork><![CDATA[
   Obsolete

<t>Obsolete (unsafe to keep state):

      ECHO

      ECHO REPLY

      PO Conn permitted

      PO service profile

      CC

      CC.NEW

      CC.ECHO

      Alt CS req

      Alt CS data

   No
</t>
<ul empty="true">

<li>Echo
</li>

<li>Echo Reply
</li>

<li>Partial Order Connection Permitted
</li>

<li>Partial Order Service Profile
</li>

<li>CC
</li>

<li>CC.NEW
</li>

<li>CC.ECHO
</li>

<li>TCP Alternate Checksum Request
</li>

<li>TCP Alternate Checksum Data
</li>

</ul>

<t>No state to keep:

      EOL

      NOP

      WS

      SACK

      TS

      MD5

      TCP-AO

      EXP1

      EXP2

   Unsafe
</t>

<ul empty="true">
<li>End of Option List (EOL)
</li>
<li>No-Operation (NOP)
</li>
<li>Window Scale (WS)
</li>
<li>SACK
</li>
<li>Timestamps (TS)
</li>
<li>MD5 Signature Option
</li>
<li>TCP Authentication Option (TCP-AO)
</li>
<li>RFC3692-style Experiment 1
</li>
<li>RFC3692-style Experiment 2
</li>
</ul>

<t>Unsafe to keep state:

      Skeeter
</t>

<ul empty="true">

<li>Skeeter (DH exchange, known to be vulnerable)

      Bubba
</li>

<li>Bubba (DH exchange, known to be vulnerable)

      Trailer CS

      SCPS
</li>

<li>Trailer Checksum Option
</li>

<li>SCPS capabilities

      S-NACK

      Records boundaries

      Corruption
</li>

<li>Selective Negative Acknowledgements (S-NACK)
</li>

<li>Records Boundaries
</li>

<li>Corruption experienced

      SNAP

      TCP
</li>

<li>SNAP
</li>

<li>TCP Compression

      Quickstart response

      UTO

      MPTCP Filter
</li>

<li>Quick-Start Response
</li>

<li>User Timeout Option (UTO)
</li>

<li>Multipath TCP (MPTCP) negotiation success (see below for negotiation failure)

      TFO
</li>

<li>TCP Fast Open (TFO) negotiation success (see below for negotiation failure)

   Safe
</li>

</ul>

<t>Safe but optional to keep state:

      MPTCP
</t>

<ul empty="true">
<li>Multipath TCP (MPTCP) negotiation failure (to avoid negotiation retries)

      MSS

      TFO
</li>

<li>Maximum Segment Size (MSS)
</li>

<li>TCP Fast Open (TFO) negotiation failure (to avoid negotiation retries)

   Safe
</li>

</ul>

<t>Safe and necessary to keep state:

      TFO cookie
</t>

<ul empty="true">

<li>TCP Fast Open (TFO) Cookie (if TFO succeeded in the past)
]]></artwork>
</figure>
</li>

</ul>

    </section>
    <section title="Automating anchor="sect-c" numbered="true" toc="default">
      <name>Automating the Initial Window in TCP over Long Timescales" anchor="sect-c"><section title="Introduction" anchor="sect-c.1"><t> Timescales</name>
      <section anchor="sect-c.1" numbered="true" toc="default">
        <name>Introduction</name>
        <t>
   Temporal sharing, as described earlier in this document, builds on
   the assumption that multiple consecutive connections between the
   same host pair host-pair are somewhat likely to be exposed to similar
   environment characteristics. The stored information can become less
   accurate over time and suitable precautions should take this ageing aging
   into consideration (this is discussed further in section 8.1). <xref target="sect-8.1"/>).
   However, there are also cases where it can make sense to track these
   values over longer periods, observing properties of TCP connections
   to gradually influence evolving trends in TCP parameters. This
   appendix describes an example of such a case.</t>
        <t>
   TCP's congestion control algorithm uses an initial window value
   (IW),
   (IW) both as a starting point for new connections and as an upper
   limit for restarting after an idle period <xref target="RFC5681"/><xref target="RFC7661"/>. target="RFC5681" format="default"/> <xref target="RFC7661" format="default"/>. This
   value has evolved over time, time; it was originally one 1 maximum segment size
   (MSS),
   (MSS) and increased to the lesser of four MSS 4 MSSs or 4,380 bytes
   <xref target="RFC3390"/><xref target="RFC5681"/>. target="RFC3390" format="default"/> <xref target="RFC5681" format="default"/>. For a typical Internet connection with a maximum
   transmission unit (MTU) of 1500 bytes, this permits three 3 segments
   of 1,460 bytes each.</t>
        <t>
   The IW value was originally implied in the original TCP congestion control
   description and documented as a standard in 1997 <xref target="RFC2001"/><xref target="Ja88"/>. target="RFC2001"
   format="default"/> <xref target="Ja88" format="default"/>. The value was
   updated in 1998 experimentally and moved to the standards track Standards Track in 2002
   <xref target="RFC2414"/><xref target="RFC3390"/>. target="RFC2414" format="default"/> <xref target="RFC3390"
   format="default"/>. In 2013, it was experimentally increased to 10 <xref target="RFC6928"/>.</t>
   target="RFC6928" format="default"/>.</t>
        <t>
   This appendix discusses how TCP can objectively measure when an IW
   is too large, large and that such feedback should be used over long
   timescales to adjust the IW automatically. The result should be
   safer to deploy and might avoid the need to repeatedly revisit IW
   over time.</t>
        <t>
   Note that this mechanism attempts to make the IW more adaptive over
   time. It can increase the IW beyond that which is currently
   recommended for widescale wide-scale deployment, and so its use should be
   carefully monitored.</t>
      </section>
      <section title="Design Considerations" anchor="sect-c.2"><t> anchor="sect-c.2" numbered="true" toc="default">
        <name>Design Considerations</name>
        <t>
   TCP's IW value has existed statically for over two decades, so any
   solution to adjusting the IW dynamically should have similarly
   stable, non-invasive effects on the performance and complexity of
   TCP. In order to be fair, the IW should be similar for most machines
   on the public Internet. Finally, a desirable goal is to develop a
   self-correcting algorithm, algorithm so that IW values that cause network
   problems can be avoided. To that end, we propose the following
   design goals:</t>

	<t><list style="symbols"><t>Impart
        <ul spacing="normal">
          <li>Impart little to no impact to TCP in the absence of loss, i.e.,
      it should not increase the complexity of default packet
      processing in the normal case.</t>

	<t>Adapt case.</li>
          <li>Adapt to network feedback over long timescales, avoiding values
      that persistently cause network problems.</t>

	<t>Decrease problems.</li>
          <li>Decrease the IW in the presence of sustained loss of IW segments,
          as determined over a number of different connections.</t>

	<t>Increase connections.</li>
          <li>Increase the IW in the absence of sustained loss of IW segments,
          as determined over a number of different connections.</t>

	<t>Operate connections.</li>
          <li>Operate conservatively, i.e., tend towards leaving the IW the
      same in the absence of sufficient information, and give greater
      consideration to IW segment loss than IW segment success.</t>

	</list>
	</t> success.</li>
        </ul>
        <t>
   We expect that, without other context, a good IW algorithm will
   converge to a single value, but this is not required. An endpoint
   with additional context or information, or deployed in a constrained
   environment, can always use a different value. In particular,
   information from previous connections, or sets of connections with a
   similar path, can already be used as context for such decisions (as
   noted in the core of this document).</t>
        <t>
   However, if a given IW value persistently causes packet loss during
   the initial burst of packets, it is clearly inappropriate and could
   be inducing unnecessary loss in other competing connections. This
   might happen for sites behind very slow boxes with small buffers,
   which may or may not be the first hop.</t>
      </section>
      <section title="Proposed anchor="sect-c.3" numbered="true" toc="default">
        <name>Proposed IW Algorithm" anchor="sect-c.3"><t> Algorithm</name>
        <t>
   Below is a simple description of the proposed IW algorithm. It
   relies on the following parameters:</t>

	<t><list style="symbols"><t>MinIW
        <ul spacing="normal">
          <li>MinIW = 3 MSS or 4,380 bytes (as per <xref target="RFC3390"/>)</t>

	<t>MaxIW target="RFC3390" format="default"/>)</li>
          <li>MaxIW = 10 MSS (as per <xref target="RFC6928"/>)</t>

	<t>MulDecr target="RFC6928" format="default"/>)</li>
          <li>MulDecr = 0.5</t>

	<t>AddIncr 0.5</li>
          <li>AddIncr = 2 MSS</t>

	<t>Threshold MSS</li>
          <li>Threshold = 0.05</t>

	</list>
	</t> 0.05</li>
        </ul>
        <t>
   We assume that the minimum IW (MinIW) should be as currently specified as
   standard <xref target="RFC3390"/>. target="RFC3390" format="default"/>. The maximum IW (MaxIW) can be
   set to a fixed value (we suggest using the experimental and now somewhat de- de
   facto standard in <xref target="RFC6928"/>) target="RFC6928" format="default"/>) or set based
   on a schedule if trusted time references are available <xref
   target="I-D.allman-tcpm-bump-initcwnd"/>; here
   target="I-D.allman-tcpm-bump-initcwnd" format="default"/>; here, we prefer
   a fixed value.  We also propose to use an AIMD Additive Increase Multiplicative
   Decrease (AIMD) algorithm, with increase and decreases as noted.</t>
        <t>
   Although these parameters are somewhat arbitrary, their initial
   values are not important except that the algorithm is AIMD and the
   MaxIW should not exceed that recommended for other systems on the
   Internet (here (here, we selected the current de-facto de facto standard rather than
   the actual standard). Current proposals, including default current
   operation, are degenerate cases of the algorithm below for given
   parameters -
   parameters, notably MulDec = 1.0 and AddIncr = 0 MSS, thus
   disabling the automatic part of the algorithm.</t>
        <t>
   The proposed algorithm is as follows:</t>

<figure><artwork><![CDATA[
   1. On boot:

<ol>

<li>
<t>On boot:</t>
<sourcecode type="pseudocode">
   IW = MaxIW; # assume this is in bytes, bytes and indicates an integer
               # multiple of 2 MSS (an even number to support
               # ACK compression)

   2. Upon
</sourcecode>
</li>

<li><t>Upon starting a new connection: connection:</t>
<sourcecode type="pseudocode">
   CWND = IW;
   conncount++;
   IWnotchecked = 1; # true

   3. During
</sourcecode>
</li>

<li>
<t>During a connection's SYN-ACK processing, if SYN-ACK includes ECN (as
similarly addressed in Sec Section 5 of ECN++ for TCP [Ba20]), <xref
target="I-D.ietf-tcpm-generalized-ecn"/>), treat as if the IW is too large:
</t>
<sourcecode type="pseudocode">
   if (IWnotchecked && &amp;&amp; (synackecn == 1)) {
      losscount++;
      IWnotchecked = 0; # never check again
   }

   4. During
</sourcecode>
</li>

<li><t>During a connection, if retransmission occurs, check the seqno of the
outgoing packet (in bytes) to see if the resent re-sent segment fixes an IW loss: loss:</t>
<sourcecode type="pseudocode">
   if (Retransmitting && &amp;&amp; IWnotchecked && &amp;&amp; ((seqno - ISN) < &lt; IW))) {
      losscount++;
      IWnotchecked = 0; # never do this entire "if" again
   } else {
      IWnotchecked = 0; # you're beyond the IW so stop checking
   }

   5. Once
</sourcecode>
</li>

<li>
<t>Once every 1000 connections, as a separate process (i.e., not as part of
processing a given connection):
</t>
<sourcecode type="pseudocode">
   if (conncount > 1000) {
      if (losscount/conncount > threshold) {
         # the number of connections with errors is too high
         IW = IW * MulDecr;
      } else {
         IW = IW + AddIncr;
      }
   }
]]></artwork>
	</figure>
</sourcecode>
</li>

</ol>

        <t>
   As presented, this algorithm can yield a false positive when the sequence
   number wraps around, e.g., the code might increment losscount in step 4
   when no loss occurred or fail to increment losscount when a loss did
   occur. This can be avoided using either
   PAWS Protection Against Wrapped
   Sequences (PAWS) <xref target="RFC7323"/> target="RFC7323" format="default"/> context or
   internal extended sequence number representations (as in TCP-AO TCP Authentication
   Option (TCP-AO) <xref target="RFC5925"/>). target="RFC5925" format="default"/>). Alternately,
   false positives can be tolerated because they are expected to be infrequent
   and thus will not significantly impact the algorithm.</t>
        <t>
   A number of additional constraints need to be imposed if this
   mechanism is implemented to ensure that it defaults to values that
   comply with current Internet standards, is conservative in how it
   extends those values, and returns to those values in the absence of
   positive feedback (i.e., success). To that end, we recommend the
   following list of example constraints:</t>

<ul>
   <li> <t>
   &gt;&gt; The automatic IW algorithm MUST <bcp14>MUST</bcp14> initialize MaxIW a
   value no larger than the currently recommended Internet default, default in the
   absence of other context information.</t>
        <t>
   Thus, if there are too few connections to make a decision or if
   there is otherwise insufficient information to increase the IW, then
   the MaxIW defaults to the current recommended value.</t> value.</t></li>

<li>        <t>
   &gt;&gt;
   An implementation MAY <bcp14>MAY</bcp14> allow the MaxIW to grow beyond the
   currently recommended Internet default, default but not more than 2 segments
   per calendar year.</t>
        <t>
   Thus, if an endpoint has a persistent history of successfully transmitting
   IW segments without loss, then it is allowed to probe the Internet to
   determine if larger IW values have similar success.  This probing is
   limited and requires a trusted time source,
   otherwise source; otherwise, the MaxIW remains constant.</t>
   constant.</t></li>
<li>
        <t>
   &gt;&gt;
   An implementation MUST <bcp14>MUST</bcp14> adjust the IW based on loss statistics at
   least once every 1000 connections.</t>
        <t>
   An endpoint needs to be sufficiently reactive to IW loss.</t>
</li>
     <li>   <t>
   &gt;&gt;
   An implementation MUST <bcp14>MUST</bcp14> decrease the IW by at least one 1 MSS when
   indicated during an evaluation interval.</t>
        <t>
   An endpoint that detects loss needs to decrease its IW by at least
   one MSS, otherwise
   1 MSS; otherwise, it is not participating in an automatic reactive
   algorithm.</t>
   algorithm.</t></li>
<li>
        <t>
   &gt;&gt;
   An implementation MUST <bcp14>MUST</bcp14> increase by no more than 2 MSS MSSs per
   evaluation interval.</t>
        <t>
   An endpoint that does not experience IW loss needs to probe the
   network incrementally.</t>
</li>
<li>
        <t>
   &gt;&gt;
   An implementation SHOULD <bcp14>SHOULD</bcp14> use an IW that is an integer multiple of
   2 MSS.</t> MSSs.</t>
        <t>
   The IW should remain a multiple of 2 MSS segments, segments to enable
   efficient ACK compression without incurring unnecessary timeouts.</t>
</li>

<li>        <t>
   &gt;&gt;
   An implementation MUST <bcp14>MUST</bcp14> decrease the IW if more than 95% of
   connections have IW losses.</t>
        <t>
   Again, this is to ensure an implementation is sufficiently reactive.</t> reactive.</t></li>

<li
>        <t>
   &gt;&gt;
   An implementation MAY <bcp14>MAY</bcp14> group IW values and statistics within
   subsets of connections. Such grouping MAY <bcp14>MAY</bcp14> use any information about
   connections to form groups except loss statistics.</t>
</li>
</ul>
<t>
   There are some TCP connections which that might not be counted at all,
   such as those to/from loopback addresses, addresses or those within the same
   subnet as that of a local interface (for which congestion control is
   sometimes disabled anyway). This may also include connections that
   terminate before the IW is full, i.e., as a separate check at the
   time of the connection closing.</t>
        <t>
   The period over which the IW is updated is intended to be a long timescale,
   e.g., a month or so, or 1,000 connections, whichever is longer. An
   implementation might check the IW once a month, month and simply not update the IW
   or clear the connection counts in months where the number of connections is
   too small.</t>
      </section>
      <section title="Discussion" anchor="sect-c.4"><t> anchor="sect-c.4" numbered="true" toc="default">
        <name>Discussion</name>
        <t>
   There are numerous parameters to the above algorithm that are
   compliant with the given requirements; this is intended to allow
   variation in configuration and implementation while ensuring that
   all such algorithms are reactive and safe.</t>
        <t>
   This algorithm continues to assume segments because that is the
   basis of most TCP implementations. It might be useful to consider
   revising the specifications to allow byte-based congestion given
   sufficient experience.</t>
        <t>
   The algorithm checks for IW losses only during the first IW after a
   connection start; it does not check for IW losses elsewhere the IW
   is used, e.g., during slow-start restarts.</t>

<ul>
     <li> <t>
   &gt;&gt; An implementation MAY <bcp14>MAY</bcp14> detect IW losses during
     slow-start restarts in addition to losses during the first IW of a
     connection. In this case, the implementation MUST <bcp14>MUST</bcp14> count
     each restart as a "connection" for the purposes of connection counts and
     periodic rechecking of the IW value.</t>
</li>
</ul>
        <t>
   False positives can occur during some kinds of segment reordering,
   e.g., that might trigger spurious retransmissions even without a
   true segment loss. These are not expected to be sufficiently common
   to dominate the algorithm and its conclusions.</t>

        <t>
   This mechanism does require additional per-connection state, which is
   currently common in some implementations, implementations and is useful for other reasons
   (e.g., the ISN is used in TCP-AO <xref target="RFC5925"/>). target="RFC5925"
   format="default"/>).

The mechanism in this appendix also benefits from persistent state kept across
reboots, as which would also be useful to other state sharing mechanisms (e.g.,
TCP Control Block Sharing per the main body of this document).</t> document).
</t>

        <t>
   The receive window (rwnd) is not involved in this calculation. The
   size of rwnd is determined by receiver resources and provides space
   to accommodate segment reordering. It

Also, rwnd is not involved with congestion control, which is the focus of the way
this document and its
   management of appendix manages the IW.</t> IW.

</t>
      </section>
      <section title="Observations" anchor="sect-c.5"><t> anchor="sect-c.5" numbered="true" toc="default">
        <name>Observations</name>
        <t>
   The IW may not converge to a single, single global value. It also may not
   converge at all, all but rather may oscillate by a few MSS MSSs as it
   repeatedly probes the Internet for larger IWs and fails. Both
   properties are consistent with TCP behavior during each individual
   connection.</t>
        <t>
   This mechanism assumes that losses during the IW are due to IW size.
   Persistent errors that drop packets for other reasons - reasons, e.g., OS
   bugs, can cause false positives. Again, this is consistent with
   TCP's basic assumption that loss is caused by congestion and
   requires backoff. This algorithm treats the IW of new connections as
   a long-timescale backoff system.</t>
      </section>
    </section>
    <section title="Acknowledgments" numbered="no" anchor="acknowledgments"><t> numbered="false" anchor="acknowledgments" toc="default">
      <name>Acknowledgments</name>
      <t>
   The authors would like to thank for Praveen Balasubramanian <contact fullname="Praveen
   Balasubramanian"/> for information regarding TCB sharing in Windows, Christoph Paasch Windows;
   <contact fullname="Christoph Paasch"/> for information regarding TCB
   sharing in Apple OSes, and Yuchung Cheng,
   Lars Eggert, Ilpo Jarvinen OSs; <contact fullname="Yuchung Cheng"/>, <contact
   fullname="Lars Eggert"/>, <contact fullname="Ilpo Jarvinen"/>, and Michael Scharf <contact
   fullname="Michael Scharf"/> for comments on earlier draft versions of the draft, this
   document; as well as members of the TCPM WG.  Earlier revisions of this
   work received funding from a collaborative research project between the
   University of Oslo and Huawei Technologies Co., Ltd. and were partly
   supported by USC/ISI's Postel Center.</t>
      <t>
   This document was prepared using 2-Word-v2.0.template.dot.</t>
    </section>
  </back>

</rfc>