rfc9406xml2.original.xml | rfc9406.xml | |||
---|---|---|---|---|
<?xml version="1.0"?> | <?xml version="1.0" encoding="UTF-8"?> | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd"[ | ||||
<!ENTITY rfc2119 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!-- [CS] updated by Chris 03/15/23 --> | |||
ence.RFC.2119.xml"> | ||||
<!ENTITY rfc5681 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!DOCTYPE rfc [ | |||
ence.RFC.5681.xml"> | <!ENTITY nbsp " "> | |||
<!ENTITY rfc8312 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!ENTITY zwsp "​"> | |||
ence.RFC.8312.xml"> | <!ENTITY nbhy "‑"> | |||
<!ENTITY rfc9002 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!ENTITY wj "⁠"> | |||
ence.RFC.9002.xml"> | ||||
<!ENTITY rfc9260 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.9260.xml"> | ||||
<!ENTITY rfc8174 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.8174.xml"> | ||||
<!ENTITY rfc1191 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.1191.xml"> | ||||
<!ENTITY rfc1122 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.1122.xml"> | ||||
<!ENTITY rfc4821 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.4821.xml"> | ||||
]> | ]> | |||
<?rfc toc='yes' ?> | ||||
<?rfc symrefs='yes' ?> | ||||
<?rfc sortrefs='yes'?> | ||||
<?rfc compact='yes'?> | ||||
<?rfc comments="yes"?> | ||||
<?rfc inline="yes" ?> | ||||
<!-- <?rfc-ext parse-xml-in-artwork='yes' ?> --> | ||||
<!-- <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> --> | ||||
<rfc docName="draft-ietf-tcpm-hystartplusplus-14" category="std" ipr="trust20090 | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" docName="draft-ietf-tcpm-hystart | |||
2"> | plusplus-14" number="9406" submissionType="IETF" category="std" consensus="true" | |||
ipr="trust200902" obsoletes="" updates="" xml:lang="en" tocInclude="true" symRe | ||||
fs="true" sortRefs="true" version="3"> | ||||
<!-- xml2rfc v2v3 conversion 3.16.0 --> | ||||
<front> | <front> | |||
<title abbrev='HyStart++'>HyStart++: Modified Slow Start for& | <title abbrev="HyStart++">HyStart++: Modified Slow Start for TCP</title> | |||
nbsp;TCP</title> | <seriesInfo name="RFC" value="9406"/> | |||
<author initials='P.' surname='Balasubramanian' fullname='Praveen Balasubram | <author initials="P." surname="Balasubramanian" fullname="Praveen Balasubram | |||
anian'> | anian"> | |||
<organization>Confluent</organization> | <organization>Confluent</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>899 West Evelyn Ave</street> | <street>899 West Evelyn Ave</street> | |||
<city>Mountain View</city> | <city>Mountain View</city> | |||
<region>CA</region> | <region>CA</region> | |||
<code>94041</code> | <code>94041</code> | |||
<country>USA</country> | <country>United States of America</country> | |||
</postal> | </postal> | |||
<email>pravb.ietf@gmail.com</email> | <email>pravb.ietf@gmail.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author initials='Y.' surname='Huang' fullname='Yi Huang'> | <author initials="Y." surname="Huang" fullname="Yi Huang"> | |||
<organization>Microsoft</organization> | <organization>Microsoft</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>One Microsoft Way</street> | <street>One Microsoft Way</street> | |||
<city>Redmond</city> | <city>Redmond</city> | |||
<region>WA</region> | <region>WA</region> | |||
<code>94052</code> | <code>98052</code> | |||
<country>USA</country> | <country>United States of America</country> | |||
</postal> | </postal> | |||
<phone>+1 425 703 0447</phone> | <phone>+1 425 703 0447</phone> | |||
<email>huanyi@microsoft.com</email> | <email>huanyi@microsoft.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author initials='M.' surname='Olson' fullname='Matt Olson'> | <author initials="M." surname="Olson" fullname="Matt Olson"> | |||
<organization>Microsoft</organization> | <organization>Microsoft</organization> | |||
<address> | <address> | |||
<postal> | ||||
<street>One Microsoft Way</street> | ||||
<city>Redmond</city> | ||||
<region>WA</region> | ||||
<code>98052</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<phone>+1 425 538 8598</phone> | <phone>+1 425 538 8598</phone> | |||
<email>maolson@microsoft.com</email> | <email>maolson@microsoft.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<date/> | <date year="2023" month="May" /> | |||
<area>Transport</area> | <area>tsv</area> | |||
<workgroup>tcpm</workgroup> | ||||
<keyword>TCP</keyword> | <keyword>TCP</keyword> | |||
<keyword>congestion control</keyword> | <keyword>congestion control</keyword> | |||
<abstract> | <abstract> | |||
<t> This document describes HyStart++, a simple modification to | <t> This document describes HyStart++, a simple modification to | |||
the slow start phase of congestion control algorithms. | the slow start phase of congestion control algorithms. | |||
Slow start can overshoot the ideal send rate | Slow start can overshoot the ideal send rate | |||
in many cases, causing high packet loss and poor performance. | in many cases, causing high packet loss and poor performance. | |||
HyStart++ uses increase in round-trip delay as a heuristic to | HyStart++ uses increase in round-trip delay as a heuristic to | |||
find an exit point before possible overshoot. | find an exit point before possible overshoot. | |||
It also adds a mitigation to prevent jitter from causing | It also adds a mitigation to prevent jitter from causing | |||
skipping to change at line 75 ¶ | skipping to change at line 76 ¶ | |||
the slow start phase of congestion control algorithms. | the slow start phase of congestion control algorithms. | |||
Slow start can overshoot the ideal send rate | Slow start can overshoot the ideal send rate | |||
in many cases, causing high packet loss and poor performance. | in many cases, causing high packet loss and poor performance. | |||
HyStart++ uses increase in round-trip delay as a heuristic to | HyStart++ uses increase in round-trip delay as a heuristic to | |||
find an exit point before possible overshoot. | find an exit point before possible overshoot. | |||
It also adds a mitigation to prevent jitter from causing | It also adds a mitigation to prevent jitter from causing | |||
premature slow start exit. | premature slow start exit. | |||
</t> | </t> | |||
</abstract> | </abstract> | |||
</front> | </front> | |||
<middle> | <middle> | |||
<section title='Introduction'> | <section numbered="true" toc="default"> | |||
<t> <xref target="RFC5681"/> describes the slow start | <name>Introduction</name> | |||
<t> <xref target="RFC5681" format="default"/> describes the slow start | ||||
congestion control algorithm for TCP. The slow start | congestion control algorithm for TCP. The slow start | |||
algorithm is used when the congestion window (cwnd) | algorithm is used when the congestion window (cwnd) | |||
is less than the slow start threshold (ssthresh). | is less than the slow start threshold (ssthresh). | |||
During slow start, in absence of packet loss signals, | During slow start, in the absence of packet loss signals, | |||
TCP increases cwnd exponentially to probe the network capacity. | TCP increases the cwnd exponentially to probe the network capacity. | |||
This fast growth can overshoot the ideal sending rate | This fast growth can overshoot the ideal sending rate | |||
and cause significant packet loss which cannot always | and cause significant packet loss that cannot always | |||
be recovered efficiently. | be recovered efficiently. | |||
</t> | </t> | |||
<t> HyStart++ uses increase in round-trip delay as a signal to exit | <t>HyStart++ builds upon Hybrid Start (HyStart), originally described in | |||
<xref target="HyStart" format="default"/>. HyStart++ uses increase in | ||||
round-trip delay as a signal to exit | ||||
slow start before potential packet loss occurs as a result | slow start before potential packet loss occurs as a result | |||
of overshoot. This is one of two algorithms specified in | of overshoot. This is one of two algorithms specified in | |||
<xref target="HyStart"/>. | <xref target="HyStart" format="default"/> for finding a safe exit point fo | |||
After the slow start exit, a new | r | |||
slow start. After the slow start exit, a new | ||||
Conservative Slow Start (CSS) phase is used to determine | Conservative Slow Start (CSS) phase is used to determine | |||
whether the slow start exit was premature and to resume | whether the slow start exit was premature and to resume | |||
slow start. This mitigation improves performance in | slow start. This mitigation improves performance in the | |||
presence of jitter. | presence of jitter. | |||
HyStart++ reduces packet loss and retransmissions, and | HyStart++ reduces packet loss and retransmissions, and | |||
improves goodput in lab measurements and real world | improves goodput in lab measurements and real-world | |||
deployments. | deployments. | |||
</t> | </t> | |||
<t> While this document describes Hystart++ for TCP, it can | <t> While this document describes HyStart++ for TCP, it can | |||
also be used for other transport protocols which use slow start | also be used for other transport protocols that use slow start, | |||
such as QUIC <xref target="RFC9002"/> | such as QUIC <xref target="RFC9002" format="default"/> | |||
or SCTP <xref target="RFC9260"/>. | or the Stream Control Transmission Protocol (SCTP) <xref target="RFC9260" | |||
</t> | format="default"/>. | |||
</section> | ||||
<section title="Terminology" anchor="term"> | ||||
<t> The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", | ||||
"SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", | ||||
"NOT RECOMMENDED", "MAY", | ||||
and "OPTIONAL" in this document are to be interpreted | ||||
as described in BCP 14 | ||||
<xref target="RFC2119"/> <xref target="RFC8174"/> when, | ||||
and only when, they appear in all capitals, as shown here. | ||||
</t> | </t> | |||
</section> | </section> | |||
<section anchor="term" numbered="true" toc="default"> | ||||
<section title='Definitions'> | <name>Terminology</name> | |||
<t> We repeat here some definition from | <t>The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
<xref target="RFC5681"/> to aid the reader. | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", | |||
"<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", | ||||
"<bcp14>SHOULD NOT</bcp14>", | ||||
"<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | ||||
"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document | ||||
are to be interpreted as described in BCP 14 | ||||
<xref target="RFC2119"/> <xref target="RFC8174"/> when, and only | ||||
when, they appear in all capitals, as shown here.</t> | ||||
</section> | ||||
<section numbered="true" toc="default"> | ||||
<name>Definitions</name> | ||||
<t> To aid the reader, we repeat some definitions from | ||||
<xref target="RFC5681" format="default"/>: | ||||
</t> | </t> | |||
<t> SENDER MAXIMUM SEGMENT SIZE (SMSS): | <dl spacing="normal" newline="false"> | |||
The SMSS is the size of the | <dt> SENDER MAXIMUM SEGMENT SIZE (SMSS):</dt> | |||
<dd>The size of the | ||||
largest segment that the sender can transmit. This value can be | largest segment that the sender can transmit. This value can be | |||
based on the maximum transmission unit of the network, the path | based on the maximum transmission unit of the network, the Path | |||
MTU discovery <xref target="RFC1191"/>, | MTU Discovery algorithm <xref target="RFC1191" format="default"/> | |||
<xref target="RFC4821"/> algorithm, RMSS (see next item), | <xref target="RFC4821" format="default"/>, RMSS (see next item), | |||
or other factors. The size does not include the TCP/IP headers | or other factors. The size does not include the TCP/IP headers | |||
and options. | and options.</dd> | |||
</t> | ||||
<t> RECEIVER MAXIMUM SEGMENT SIZE (RMSS): The RMSS is the | <dt> RECEIVER MAXIMUM SEGMENT SIZE (RMSS):</dt><dd>The | |||
size of the largest segment the receiver is willing to accept. | size of the largest segment that the receiver is willing to accept. | |||
This is the value specified in the MSS option sent by the | This is the value specified in the MSS option sent by the | |||
receiver during connection startup. Or, if the MSS option | receiver during connection startup. Or, if the MSS option | |||
is not used, it is 536 bytes <xref target="RFC1122"/>. | is not used, it is 536 bytes <xref target="RFC1122" format="default"/>. | |||
The size does not include the TCP/IP headers and | The size does not include the TCP/IP headers and | |||
options. | options.</dd> | |||
</t> | ||||
<t> RECEIVER WINDOW (rwnd): The most recently advertised | <dt> RECEIVER WINDOW (rwnd):</dt><dd>The most recently advertised | |||
receiver window. | receiver window.</dd> | |||
</t> | ||||
<t> CONGESTION WINDOW (cwnd): A TCP state variable that | <dt> CONGESTION WINDOW (cwnd):</dt><dd>A TCP state variable that | |||
limits the amount of data a TCP can send. | limits the amount of data a TCP can send. | |||
At any given time, a TCP MUST NOT send | At any given time, a TCP <bcp14>MUST NOT</bcp14> send | |||
data with a sequence number higher than the sum of the highest | data with a sequence number higher than the sum of the highest | |||
acknowledged sequence number and the minimum of cwnd and rwnd. | acknowledged sequence number and the minimum of the cwnd and rwnd.</dd> | |||
</t> | </dl> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='HyStart++ Algorithm'> | <name>HyStart++ Algorithm</name> | |||
<section numbered="true" toc="default"> | ||||
<section title='Summary'> | <name>Summary</name> | |||
<t> <xref target="HyStart"/> specifies two algorithms | <t> <xref target="HyStart" format="default"/> specifies two algorithms | |||
(a "Delay Increase" algorithm and an "Inter-Packet Arrival" | (a "Delay Increase" algorithm and an "Inter-Packet Arrival" | |||
algorithm) to be run in parallel to detect that the sending | algorithm) to be run in parallel to detect that the sending | |||
rate has reached capacity. In practice, the Inter-Packet | rate has reached capacity. In practice, the Inter-Packet | |||
Arrival algorithm does not perform well and is not able | Arrival algorithm does not perform well and is not able | |||
to detect congestion early, primarily due to ACK compression. | to detect congestion early, primarily due to ACK compression. | |||
The idea of the Delay Increase algorithm is to look for | The idea of the Delay Increase algorithm is to look for | |||
spikes in RTT (round-trip time), which suggest that the | spikes in RTT (round-trip time), which suggest that the | |||
bottleneck buffer is filling up. | bottleneck buffer is filling up. | |||
</t> | </t> | |||
<t> In HyStart++, a TCP sender uses traditional slow start | <t> In HyStart++, a TCP sender uses standard slow start | |||
and then uses the "Delay Increase" algorithm to trigger an | and then uses the Delay Increase algorithm to trigger an | |||
exit from slow start. But instead of going straight from | exit from slow start. But instead of going straight from | |||
slow start to congestion avoidance, the sender spends a | slow start to congestion avoidance, the sender spends a | |||
number of RTTs in a Conservative Slow Start (CSS) phase | number of RTTs in a Conservative Slow Start (CSS) phase | |||
to determine whether the exit from slow start was premature. | to determine whether the exit from slow start was premature. | |||
During CSS, the congestion window is grown exponentially like | During CSS, the congestion window is grown exponentially in a | |||
in regular slow start, but with a smaller exponential base, | fashion similar to regular slow start, but with a smaller exponential ba | |||
se, | ||||
resulting in less aggressive growth. | resulting in less aggressive growth. | |||
If the RTT reduces during CSS, it's concluded that the RTT | If the RTT reduces during CSS, it's concluded that the RTT | |||
spike was not related to congestion caused by the connection | spike was not related to congestion caused by the connection | |||
sending at a rate greater than the ideal send rate, and the | sending at a rate greater than the ideal send rate, and the | |||
connection resumes slow start. If the RTT inflation | connection resumes slow start. If the RTT inflation | |||
persists throughout CSS, the connection enters congestion | persists throughout CSS, the connection enters congestion | |||
avoidance. | avoidance. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='Algorithm Details'> | <name>Algorithm Details</name> | |||
<t> The following pseudocode uses a limit, L, to control the | <t> The following pseudocode uses a limit, L, to control the | |||
aggressiveness of the cwnd increase during both standard slow | aggressiveness of the cwnd increase during both standard slow | |||
start and CSS. While an arriving ACK may newly acknowledge an | start and CSS. While an arriving ACK may newly acknowledge an | |||
arbitrary number of bytes, the Hystart++ algorithm limits the | arbitrary number of bytes, the HyStart++ algorithm limits the | |||
number of those bytes applied to increase the cwnd to L*SMSS bytes. </t> | number of those bytes applied to increase the cwnd to L*SMSS bytes. </t> | |||
<t> lastRoundMinRTT and currentRoundMinRTT are initialized | <t> lastRoundMinRTT and currentRoundMinRTT are initialized | |||
to infinity at the initialization time. currRTT is the RTT | to infinity at the initialization time. currRTT is the RTT | |||
sampled from the latest incoming ACK and initialized to | sampled from the latest incoming ACK and initialized to | |||
infinity. </t> | infinity. </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
lastRoundMinRTT = infinity | lastRoundMinRTT = infinity | |||
currentRoundMinRTT = infinity | currentRoundMinRTT = infinity | |||
currRTT = infinity | currRTT = infinity | |||
</sourcecode> | </sourcecode> | |||
<t>HyStart++ measures rounds using sequence numbers, as | ||||
<t>Hystart++ measures rounds using sequence numbers, as | follows:</t> | |||
follows: | <ul spacing="normal"> | |||
Define windowEnd as a sequence number initialized to SND.NXT. | <li>Define windowEnd as a sequence number initialized to SND.NXT.</li> | |||
When windowEnd is ACKed, the current round ends and windowEnd | <li>When windowEnd is ACKed, the current round ends and windowEnd | |||
is set to SND.NXT.</t> | is set to SND.NXT.</li> | |||
</ul> | ||||
<t> At the start of each round during standard slow start | <t> At the start of each round during standard slow start | |||
(<xref target="RFC5681"/>) and CSS, initialize the variables | <xref target="RFC5681" format="default"/> and CSS, initialize the variab | |||
used to compute last round and current round's minimum RTT: | les | |||
<sourcecode type="pseudocode"> | used to compute the last round's and current round's minimum RTT: | |||
</t> | ||||
<sourcecode> | ||||
lastRoundMinRTT = currentRoundMinRTT | lastRoundMinRTT = currentRoundMinRTT | |||
currentRoundMinRTT = infinity | currentRoundMinRTT = infinity | |||
rttSampleCount = 0 | rttSampleCount = 0 | |||
</sourcecode> | </sourcecode> | |||
</t> | ||||
<t> For each arriving ACK in slow start, where N is the | <t> For each arriving ACK in slow start, where N is the | |||
number of previously unacknowledged bytes acknowledged | number of previously unacknowledged bytes acknowledged | |||
in the arriving ACK: </t> | in the arriving ACK: </t> | |||
<t>Update the cwnd: </t> | <t>Update the cwnd: </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
cwnd = cwnd + min(N, L * SMSS) | cwnd = cwnd + min(N, L * SMSS) | |||
</sourcecode> | </sourcecode> | |||
<t> Keep track of minimum observed RTT: </t> | <t> Keep track of the minimum observed RTT: </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | |||
rttSampleCount += 1 | rttSampleCount += 1 | |||
</sourcecode> | </sourcecode> | |||
<t>For rounds where at least N_RTT_SAMPLE RTT samples have been | <t>For rounds where at least N_RTT_SAMPLE RTT samples have been | |||
obtained and currentRoundMinRTT and lastRoundMinRTT are valid, | obtained and currentRoundMinRTT and lastRoundMinRTT are valid, | |||
check if delay increase triggers slow start exit:</t> | check to see if delay increase triggers slow start exit:</t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
if ((rttSampleCount >= N_RTT_SAMPLE) AND | if ((rttSampleCount >= N_RTT_SAMPLE) AND | |||
(currentRoundMinRTT != infinity) AND | (currentRoundMinRTT != infinity) AND | |||
(lastRoundMinRTT != infinity)) | (lastRoundMinRTT != infinity)) | |||
Compute a RTT Threshold clamped between MIN_RTT_THRESH and MAX_RTT_THRESH | RttThresh = max(MIN_RTT_THRESH, | |||
RttThresh = max(MIN_RTT_THRESH, min(lastRoundMinRTT / MIN_RTT_DIVISOR, MAX_RTT | min(lastRoundMinRTT / MIN_RTT_DIVISOR, MAX_RTT_THRESH)) | |||
_THRESH)) | ||||
if (currentRoundMinRTT >= (lastRoundMinRTT + RttThresh)) | if (currentRoundMinRTT >= (lastRoundMinRTT + RttThresh)) | |||
cssBaselineMinRtt = currentRoundMinRTT | cssBaselineMinRtt = currentRoundMinRTT | |||
exit slow start and enter CSS | exit slow start and enter CSS | |||
</sourcecode> | </sourcecode> | |||
<t> For each arriving ACK in CSS, where N is the number | <t> For each arriving ACK in CSS, where N is the number | |||
of previously unacknowledged bytes acknowledged in | of previously unacknowledged bytes acknowledged in | |||
the arriving ACK:</t> | the arriving ACK:</t> | |||
<t> Update the cwnd: </t> | <t> Update the cwnd: </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
cwnd = cwnd + (min(N, L * SMSS) / CSS_GROWTH_DIVISOR) | cwnd = cwnd + (min(N, L * SMSS) / CSS_GROWTH_DIVISOR) | |||
</sourcecode> | </sourcecode> | |||
<t> Keep track of minimum observed RTT: </t> | <t> Keep track of the minimum observed RTT: </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | |||
rttSampleCount += 1 | rttSampleCount += 1 | |||
</sourcecode> | </sourcecode> | |||
<t> For CSS rounds where at least N_RTT_SAMPLE RTT | <t> For CSS rounds where at least N_RTT_SAMPLE RTT | |||
samples have been obtained, check if current round's | samples have been obtained, check to see if the current round's | |||
minRTT drops below baseline indicating that HyStart | minRTT drops below baseline (cssBaselineMinRtt) indicating that | |||
exit was spurious: | slow start exit was spurious: | |||
</t> | </t> | |||
<sourcecode type="pseudocode"> | <sourcecode> | |||
if (currentRoundMinRTT < cssBaselineMinRtt) | if (currentRoundMinRTT < cssBaselineMinRtt) | |||
cssBaselineMinRtt = infinity | cssBaselineMinRtt = infinity | |||
resume slow start including HyStart++ | resume slow start including HyStart++ | |||
</sourcecode> | </sourcecode> | |||
<t> CSS lasts at most CSS_ROUNDS rounds. If the transition | <t> CSS lasts at most CSS_ROUNDS rounds. If the transition | |||
into CSS happens in the middle of a round, that partial | into CSS happens in the middle of a round, that partial | |||
round counts towards the limit. </t> | round counts towards the limit. </t> | |||
<t> If CSS_ROUNDS rounds are complete, | <t> If CSS_ROUNDS rounds are complete, | |||
enter congestion avoidance by setting ssthresh to current cwnd. </t> | enter congestion avoidance by setting the ssthresh to the current cwnd. | |||
<sourcecode type="pseudocode"> | </t> | |||
<sourcecode> | ||||
ssthresh = cwnd | ssthresh = cwnd | |||
</sourcecode> | </sourcecode> | |||
<t> If loss or Explicit Congestion Notification (ECN) marking is observe | ||||
<t> If loss or ECN-marking is observed anytime during | d at any time during | |||
standard slow start or CSS, enter congestion avoidance | standard slow start or CSS, enter congestion avoidance | |||
by setting ssthresh to current cwnd. </t> | by setting the ssthresh to the current cwnd. | |||
<sourcecode type="pseudocode"> | </t> | |||
<sourcecode> | ||||
ssthresh = cwnd | ssthresh = cwnd | |||
</sourcecode> | </sourcecode> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='Tuning constants and other considerations'> | <name>Tuning Constants and Other Considerations</name> | |||
<t> It is RECOMMENDED that a HyStart++ implementation use | <t> It is <bcp14>RECOMMENDED</bcp14> that a HyStart++ implementation use | |||
the following constants: | the following constants: | |||
<sourcecode type="pseudocode"> | </t> | |||
<sourcecode> | ||||
MIN_RTT_THRESH = 4 msec | MIN_RTT_THRESH = 4 msec | |||
MAX_RTT_THRESH = 16 msec | MAX_RTT_THRESH = 16 msec | |||
MIN_RTT_DIVISOR = 8 | MIN_RTT_DIVISOR = 8 | |||
N_RTT_SAMPLE = 8 | N_RTT_SAMPLE = 8 | |||
CSS_GROWTH_DIVISOR = 4 | CSS_GROWTH_DIVISOR = 4 | |||
CSS_ROUNDS = 5 | CSS_ROUNDS = 5 | |||
L = infinity if paced, L = 8 if non-paced | L = infinity if paced, L = 8 if non-paced | |||
</sourcecode> | </sourcecode> | |||
</t> | <t> These constants have been determined with lab measurements | |||
<t> These constants have been determined with lab measurements | and real-world deployments. An implementation <bcp14>MAY</bcp14> tune them | |||
and real world deployments. An implementation MAY tune them for | for | |||
different network characteristics. | different network characteristics. | |||
</t> | </t> | |||
<t> The delay increase sensitivity is determined | <t> The delay increase sensitivity is determined | |||
by MIN_RTT_THRESH and MAX_RTT_THRESH. Smaller values of | by MIN_RTT_THRESH and MAX_RTT_THRESH. Smaller values of | |||
MIN_RTT_THRESH may cause spurious exits from slow start. Larger | MIN_RTT_THRESH may cause spurious exits from slow start. Larger | |||
values of MAX_RTT_THRESH may result in slow start not exiting | values of MAX_RTT_THRESH may result in slow start not exiting | |||
until loss is encountered for connections on large RTT paths. | until loss is encountered for connections on large RTT paths. | |||
</t> | </t> | |||
<t>MIN_RTT_DIVISOR is a fraction of RTT to compute delay threshold. | <t>MIN_RTT_DIVISOR is a fraction of RTT to compute the delay threshold. | |||
A smaller value would mean a bigger threshold and thus less sensitive to | A smaller value would mean a larger threshold and thus less sensitivity to | |||
delay increase, and vice versa. | delay increase, and vice versa. | |||
</t> | </t> | |||
<t> While all TCP implementations are REQUIRED to take at least one RTT | <t> While all TCP implementations are <bcp14>REQUIRED</bcp14> to take at | |||
sample each round, implementations of HyStart++ are RECOMMENDED to take | least one RTT | |||
sample each round, implementations of HyStart++ are <bcp14>RECOMMENDED</bc | ||||
p14> to take | ||||
at least N_RTT_SAMPLE RTT samples. Using lower values of N_RTT_SAMPLE will | at least N_RTT_SAMPLE RTT samples. Using lower values of N_RTT_SAMPLE will | |||
lower the accuracy of the measured RTT for the round; | lower the accuracy of the measured RTT for the round; | |||
higher values will improve accuracy at the cost of more | higher values will improve accuracy at the cost of more | |||
processing. | processing. | |||
</t> | </t> | |||
<t> The minimum value of CSS_GROWTH_DIVISOR MUST be at least 2. | <t> The minimum value of CSS_GROWTH_DIVISOR <bcp14>MUST</bcp14> be at le | |||
ast 2. | ||||
A value of 1 results in the same aggressive behavior as regular | A value of 1 results in the same aggressive behavior as regular | |||
slow start. Values larger than 4 | slow start. Values larger than 4 | |||
will cause the algorithm to be less aggressive and maybe less | will cause the algorithm to be less aggressive and maybe less | |||
performant. | performant. | |||
</t> | </t> | |||
<t> Smaller values of CSS_ROUNDS may miss detecting jitter | <t> Smaller values of CSS_ROUNDS may miss detecting jitter, | |||
and larger values may limit performance. | and larger values may limit performance. | |||
</t> | </t> | |||
<t> Packet pacing <xref target="ASA00"/> is a possible mechanism to | <t> Packet pacing <xref target="ASA00" format="default"/> is a possible | |||
avoid large bursts and their associated harm. A paced TCP implementation S | mechanism to | |||
HOULD | avoid large bursts and their associated harm. A paced TCP implementation < | |||
use L = infinity. Burst concerns are mitigated by pacing and this | bcp14>SHOULD</bcp14> | |||
use L = infinity. Burst concerns are mitigated by pacing, and this | ||||
setting allows for optimal cwnd growth on modern networks. | setting allows for optimal cwnd growth on modern networks. | |||
</t> | </t> | |||
<t> For TCP implementations that pace to mitigate burst concerns, L | <t> For TCP implementations that pace to mitigate burst concerns, L | |||
values smaller than INFINITY may suffer performance problems due to slow | values smaller than infinity may suffer performance problems due to slow | |||
cwnd growth in high speed networks. For non-paced TCP implementations, L v | cwnd growth in high-speed networks. For non-paced TCP implementations, L v | |||
alues | alues | |||
smaller than 8 may suffer performance problems due to slow cwnd growth in | smaller than 8 may suffer performance problems due to slow cwnd growth in | |||
high | high-speed networks; L values larger than 8 may cause an increase in burstiness | |||
speed networks; L values larger than 8 may cause an increase in burstiness | ||||
and thereby loss rates, and result in poor performance. | and thereby loss rates, and result in poor performance. | |||
</t> | </t> | |||
<t> An implementation SHOULD use HyStart++ only for the | <t> An implementation <bcp14>SHOULD</bcp14> use HyStart++ only for the | |||
initial slow start (when ssthresh is at its initial value | initial slow start (when the ssthresh is at its initial value | |||
of arbitrarily high per <xref target="RFC5681"/>) and fall | of arbitrarily high per <xref target="RFC5681" format="default"/>) and fal | |||
back to using traditional slow start for the remainder of | l | |||
back to using standard slow start for the remainder of | ||||
the connection lifetime. This is acceptable because subsequent | the connection lifetime. This is acceptable because subsequent | |||
slow starts will use the discovered ssthresh value to exit slow | slow starts will use the discovered ssthresh value to exit slow | |||
start and avoid the overshoot problem. An implementation MAY | start and avoid the overshoot problem. An implementation <bcp14>MAY</bcp14 > | |||
use HyStart++ to grow the restart window | use HyStart++ to grow the restart window | |||
(<xref target="RFC5681"/>) after a long idle period. | <xref target="RFC5681" format="default"/> after a long idle period. | |||
</t> | </t> | |||
<t> | <t> | |||
In application limited scenarios, the amount of data in | In application-limited scenarios, the amount of data in | |||
flight could fall below the bandwidth-delay product (BDP) and | flight could fall below the bandwidth-delay product (BDP) and | |||
result in smaller RTT samples which can trigger an exit back to | result in smaller RTT samples, which can trigger an exit back to | |||
slow start. It is expected that a connection might oscillate | slow start. It is expected that a connection might oscillate | |||
between CSS and slow start in such scenarios. But this behavior | between CSS and slow start in such scenarios. But this behavior | |||
will neither result in a connection prematurely entering | will neither result in a connection prematurely entering | |||
congestion avoidance nor cause overshooting compared to | congestion avoidance nor cause overshooting compared to | |||
slow start. | slow start. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='Deployments and Performance Evaluations'> | <name>Deployments and Performance Evaluations</name> | |||
<t> At the time of this writing, HyStart++ as described | ||||
<t> As of February 2023, HyStart++ as described | ||||
in this document has been default enabled for all TCP | in this document has been default enabled for all TCP | |||
connections in the Windows operating system for over | connections in the Windows operating system for over | |||
two years with pacing disabled and an actual L = 8. | two years with pacing disabled and an actual L = 8. | |||
</t> | </t> | |||
<t> In lab measurements with Windows TCP, HyStart++ shows | <t> In lab measurements with Windows TCP, HyStart++ shows | |||
both goodput improvements as well as reductions in packet | goodput improvements as well as reductions in packet | |||
loss and retransmissions compared to traditional slow start. | loss and retransmissions compared to standard slow start. | |||
For example, across a variety of tests on a 100 Mbps link | For example, across a variety of tests on a 100 Mbps link | |||
with a bottleneck buffer size of bandwidth-delay product, | with a bottleneck buffer size of bandwidth-delay product, | |||
HyStart++ reduces bytes retransmitted by 50% and | HyStart++ reduces bytes retransmitted by 50% and | |||
retransmission timeouts (RTOs) by 36%. | retransmission timeouts (RTOs) by 36%. | |||
</t> | </t> | |||
<t> In an A/B test where we compare HyStart++ draft 01 to | <t> In an A/B test where we compared an implementation of HyStart++ | |||
traditional slow start across a large Windows device | (based on an earlier draft version of this document) to | |||
standard slow start across a large Windows device | ||||
population, out of 52 billion TCP connections, 0.7% of | population, out of 52 billion TCP connections, 0.7% of | |||
connections move from 1 RTO to 0 RTOs and another 0.7% | connections move from 1 RTO to 0 RTOs and another 0.7% of | |||
connections move from 2 RTOs to 1 RTO with HyStart++. | connections move from 2 RTOs to 1 RTO with HyStart++. | |||
This test did not focus on send-heavy connections and | This test did not focus on send-heavy connections, and | |||
the impact on send-heavy connections is likely much | the impact on send-heavy connections is likely much | |||
higher. We plan to conduct more such production | higher. We plan to conduct more such production | |||
experiments to gather more data in the future. | experiments to gather more data in the future. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='Security Considerations'> | <name>Security Considerations</name> | |||
<t> HyStart++ enhances slow start and inherits the general | <t> HyStart++ enhances slow start and inherits the general | |||
security considerations discussed in <xref target="RFC5681"/>. | security considerations discussed in <xref target="RFC5681" format="defaul t"/>. | |||
</t> | </t> | |||
<t>An attacker can cause HyStart++ to exit slow start prematurely | ||||
<t>An attacker can cause Hystart++ to exit slow start prematurely | ||||
and impair the performance of a TCP connection by, for example, | and impair the performance of a TCP connection by, for example, | |||
dropping data packets or their acknowledgements.</t> | dropping data packets or their acknowledgments.</t> | |||
<t>The ACK division attack outlined in <xref target="SCWA99" format="defau | ||||
<t>The ACK division attack outlined in <xref target="SCWA99"/> does not af | lt"/> does not affect | |||
fect | HyStart++ because the congestion window increase in HyStart++ is based | |||
Hystart++ because the congestion window increase in Hystart++ is based | ||||
on the number of bytes newly acknowledged in each arriving ACK rather than by | on the number of bytes newly acknowledged in each arriving ACK rather than by | |||
a particular constant on each arriving ACK. | a particular constant on each arriving ACK. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title='IANA Considerations'> | <name>IANA Considerations</name> | |||
<t> This document has no actions for IANA. | <t>This document has no IANA actions.</t> | |||
</t> | ||||
</section> | ||||
<section title='Acknowledgements'> | ||||
<t> During the discussions of this work on the TCPM mailing list, in worki | ||||
ng group meetings, | ||||
helpful comments, critiques, and reviews were received from (listed alph | ||||
abetically by last name): | ||||
Mark Allman, Bob Briscoe, Neal Cardwell, Yuchung Cheng, Junho Choi, Mart | ||||
in Duke, Reese Enghardt, | ||||
Christian Huitema, Ilpo Järvinen, Yoshifumi Nishida, Randall Stewart, an | ||||
d Michael Tuexen. | ||||
</t> | ||||
</section> | </section> | |||
</middle> | </middle> | |||
<back> | <back> | |||
<references title='Normative References'> | <references> | |||
&rfc2119; | <name>References</name> | |||
&rfc5681; | <references> | |||
&rfc8174; | <name>Normative References</name> | |||
</references> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2 | |||
119.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5 | ||||
681.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | ||||
174.xml"/> | ||||
</references> | ||||
<references> | ||||
<name>Informative References</name> | ||||
<references title='Informative References'> | <reference anchor="HyStart" target="https://doi.org/10.1016/j.comnet.201 | |||
<reference anchor='HyStart' target='https://doi.org/10.1016/j.comnet.2011. | 1.01.014"> | |||
01.014'> | <front> | |||
<front> | <title>Taming the elephants: New TCP slow start</title> | |||
<title>Taming the elephants: New TCP slow start</title> | <author initials="S." surname="Ha"> | |||
<author initials="S." surname="Ha"> | ||||
</author> | </author> | |||
<author initials="I." surname="Ree"> | <author initials="I." surname="Rhee"> | |||
</author> | </author> | |||
<date year="2011"/> | <date month="June" year="2011"/> | |||
</front> | </front> | |||
<seriesInfo name="" value="Computer Networks vol. 55, no. 9, pp. 2092-21 | <refcontent>Computer Networks vol. 55, no. 9, pp. 2092-2110</refconten | |||
10"/> | t> | |||
<seriesInfo name="DOI" value="10.1016/j.comnet.2011.01.014"/> | <seriesInfo name="DOI" value="10.1016/j.comnet.2011.01.014"/> | |||
</reference> | </reference> | |||
&rfc9002; | ||||
&rfc9260; | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
&rfc1191; | 002.xml"/> | |||
&rfc4821; | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
&rfc1122; | 260.xml"/> | |||
<reference anchor='SCWA99' target='https://doi.org/10.1145/505696.505704'> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1 | |||
<front> | 191.xml"/> | |||
<title>TCP congestion control with a misbehaving receiver</title> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4 | |||
<author initials="S." surname="Savage"> | 821.xml"/> | |||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1 | ||||
122.xml"/> | ||||
<reference anchor="SCWA99" target="https://doi.org/10.1145/505696.505704 | ||||
"> | ||||
<front> | ||||
<title>TCP congestion control with a misbehaving receiver</title> | ||||
<author initials="S." surname="Savage"> | ||||
</author> | </author> | |||
<author initials="N." surname="Cardwell"> | <author initials="N." surname="Cardwell"> | |||
</author> | </author> | |||
<author initials="D." surname="Wetherall"> | <author initials="D." surname="Wetherall"> | |||
</author> | </author> | |||
<author initials="T." surname="Anderson"> | <author initials="T." surname="Anderson"> | |||
</author> | </author> | |||
<date year="1999"/> | <date month="October" year="1999"/> | |||
</front> | </front> | |||
<seriesInfo name="" value="ACM Computer Communication Review, 29(5)"/> | <refcontent>ACM SIGCOMM Computer Communication Review, vol. 29, issue | |||
<seriesInfo name="DOI" value="10.1145/505696.505704"/> | 5, pp. 71-78</refcontent> | |||
</reference> | <seriesInfo name="DOI" value="10.1145/505696.505704"/> | |||
<reference anchor='ASA00' target='https://doi.org/10.1109/INFCOM.2000.8324 | </reference> | |||
83'> | ||||
<front> | <reference anchor="ASA00" target="https://doi.org/10.1109/INFCOM.2000.83 | |||
<title>Understanding the Performance of TCP Pacing</title> | 2483"> | |||
<author initials="A." surname="Aggarwal"> | <front> | |||
<title>Understanding the performance of TCP pacing</title> | ||||
<author initials="A." surname="Aggarwal"> | ||||
</author> | </author> | |||
<author initials="S." surname="Savage"> | <author initials="S." surname="Savage"> | |||
</author> | </author> | |||
<author initials="T." surname="Anderson"> | <author initials="T." surname="Anderson"> | |||
</author> | </author> | |||
<date year="2000"/> | <date month="March" year="2000"/> | |||
</front> | </front> | |||
<seriesInfo name="" value="Proceedings IEEE INFOCOM 2000"/> | <refcontent>Proceedings IEEE INFOCOM 2000</refcontent> | |||
<seriesInfo name="DOI" value="10.1109/INFCOM.2000.832483"/> | <seriesInfo name="DOI" value="10.1109/INFCOM.2000.832483"/> | |||
</reference> | </reference> | |||
</references> | ||||
</references> | </references> | |||
<section numbered="false" toc="default"> | ||||
<name>Acknowledgments</name> | ||||
<t> During the discussions of this work on the TCPM mailing list and in wo | ||||
rking group meetings, | ||||
helpful comments, critiques, and reviews were received from (listed alph | ||||
abetically by last name) | ||||
<contact fullname="Mark Allman"/>, <contact fullname="Bob Briscoe"/>, <c | ||||
ontact fullname="Neal Cardwell"/>, <contact fullname="Yuchung Cheng"/>, <contact | ||||
fullname="Junho Choi"/>, <contact fullname="Martin Duke"/>, <contact fullname=" | ||||
Reese Enghardt"/>, | ||||
<contact fullname="Christian Huitema"/>, <contact fullname="Ilpo Järvine | ||||
n"/>, <contact fullname="Yoshifumi Nishida"/>, <contact fullname="Randall Stewar | ||||
t"/>, and <contact fullname="Michael Tüxen"/>. | ||||
</t> | ||||
</section> | ||||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 92 change blocks. | ||||
281 lines changed or deleted | 297 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |