rfc9293xml2.original.xml | rfc9293.xml | |||
---|---|---|---|---|
<?xml version="1.0" encoding="UTF-8"?> | <?xml version='1.0' encoding='utf-8'?> | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ | <!DOCTYPE rfc [ | |||
<!-- A set of on-line citation libraries are maintained on the xml2rfc web site. | <!ENTITY nbsp " "> | |||
The next line defines an entity named RFC2629, which contains the necessary | <!ENTITY zwsp "​"> | |||
XML | <!ENTITY nbhy "‑"> | |||
for the reference element, and is used much later in the file. This XML co | <!ENTITY wj "⁠"> | |||
ntains an | ||||
anchor (also RFC2629) which can be used to cross-reference this item in the | ||||
text. | ||||
You can also use local file names instead of a URI. The environment variab | ||||
le | ||||
XML_LIBRARY provides a search path of directories to look at to locate a | ||||
relative path name for the file. There has to be one entity for each item t | ||||
o be | ||||
referenced. --> | ||||
<!ENTITY RFC0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.0791.xml"> | ||||
<!ENTITY RFC0793 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.0793.xml"> | ||||
<!ENTITY RFC0879 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.0879.xml"> | ||||
<!ENTITY RFC0896 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.0896.xml"> | ||||
<!ENTITY RFC1011 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.1011.xml"> | ||||
<!ENTITY RFC1122 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.1122.xml"> | ||||
<!ENTITY RFC1191 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.1191.xml"> | ||||
<!ENTITY RFC1349 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.1349.xml"> | ||||
<!ENTITY RFC1644 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.1644.xml"> | ||||
<!ENTITY RFC2018 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2018.xml"> | ||||
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2119.xml"> | ||||
<!ENTITY RFC2474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2474.xml"> | ||||
<!ENTITY RFC2525 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2525.xml"> | ||||
<!ENTITY RFC2675 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2675.xml"> | ||||
<!ENTITY RFC2873 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2873.xml"> | ||||
<!ENTITY RFC2883 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2883.xml"> | ||||
<!ENTITY RFC2914 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2914.xml"> | ||||
<!ENTITY RFC2923 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.2923.xml"> | ||||
<!ENTITY RFC3168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.3168.xml"> | ||||
<!ENTITY RFC3449 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.3449.xml"> | ||||
<!ENTITY RFC3465 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.3465.xml"> | ||||
<!ENTITY RFC4727 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.4727.xml"> | ||||
<!ENTITY RFC4821 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.4821.xml"> | ||||
<!ENTITY RFC4953 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.4953.xml"> | ||||
<!ENTITY RFC4987 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.4987.xml"> | ||||
<!ENTITY RFC5033 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5033.xml"> | ||||
<!ENTITY RFC5044 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5044.xml"> | ||||
<!ENTITY RFC5461 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5461.xml"> | ||||
<!ENTITY RFC5570 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5570.xml"> | ||||
<!ENTITY RFC5681 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5681.xml"> | ||||
<!ENTITY RFC5795 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5795.xml"> | ||||
<!ENTITY RFC5925 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5925.xml"> | ||||
<!ENTITY RFC5961 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.5961.xml"> | ||||
<!ENTITY RFC6093 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6093.xml"> | ||||
<!ENTITY RFC6191 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6191.xml"> | ||||
<!ENTITY RFC6298 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6298.xml"> | ||||
<!ENTITY RFC6429 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6429.xml"> | ||||
<!ENTITY RFC6528 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6528.xml"> | ||||
<!ENTITY RFC6633 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6633.xml"> | ||||
<!ENTITY RFC6691 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6691.xml"> | ||||
<!ENTITY RFC6864 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6864.xml"> | ||||
<!ENTITY RFC6994 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.6994.xml"> | ||||
<!ENTITY RFC7094 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.7094.xml"> | ||||
<!ENTITY RFC7323 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.7323.xml"> | ||||
<!ENTITY RFC7413 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.7413.xml"> | ||||
<!ENTITY RFC7414 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.7414.xml"> | ||||
<!ENTITY RFC7657 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.7657.xml"> | ||||
<!ENTITY RFC8087 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8087.xml"> | ||||
<!ENTITY RFC8095 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8095.xml"> | ||||
<!ENTITY RFC8174 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8174.xml"> | ||||
<!ENTITY RFC8200 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8200.xml"> | ||||
<!ENTITY RFC8201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8201.xml"> | ||||
<!ENTITY RFC8303 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8303.xml"> | ||||
<!ENTITY RFC8504 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8504.xml"> | ||||
<!ENTITY RFC8546 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8546.xml"> | ||||
<!ENTITY RFC8548 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8548.xml"> | ||||
<!ENTITY RFC8558 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8558.xml"> | ||||
<!ENTITY RFC8684 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8684.xml"> | ||||
<!ENTITY RFC8961 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.8961.xml"> | ||||
<!ENTITY RFC9000 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.9000.xml"> | ||||
<!ENTITY RFC9065 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC | ||||
.9065.xml"> | ||||
<!-- There is also a library of current Internet Draft citations. It isn't a go | ||||
od idea to | ||||
actually use one for the template because it might have disappeared when yo | ||||
u come to test | ||||
this template. This is the form of the entity definition | ||||
<!ENTITY I-D.mrose-writing-rfcs SYSTEM | ||||
"http://xml.resource.org/public/rfc/bibxml3/reference.I-D.mrose-writing-rfc | ||||
s.xml"> | ||||
corresponding to a draft filename draft-mrose-writing-rfcs-nn.txt. The cita | ||||
tion will be | ||||
to the most recent draft in the sequence, and is updated roughly hourly on | ||||
the web site. | ||||
For working group drafts, the same principle applies: file name starts draf | ||||
t-ietf-wgname-.. | ||||
and entity file is reference.I-D.ietf-wgname-... The corresponding entity | ||||
name is | ||||
I-D.ietf-wgname-... (I-D.mrose-writing-rfcs for the other example). Of cou | ||||
rse this doesn't | ||||
change when the draft version changes. | ||||
--> | ||||
<!ENTITY I-D.gont-tcpm-tcp-seccomp-prec SYSTEM "https://xml2rfc.tools.ietf.org/p | ||||
ublic/rfc/bibxml3/reference.I-D.draft-gont-tcpm-tcp-seccomp-prec-00.xml"> | ||||
<!ENTITY I-D.gont-tcpm-tcp-seq-validation SYSTEM "https://xml2rfc.tools.ietf.org | ||||
/public/rfc/bibxml3/reference.I-D.draft-gont-tcpm-tcp-seq-validation-04.xml"> | ||||
<!ENTITY I-D.ietf-tcpm-tcp-edo SYSTEM "https://xml2rfc.tools.ietf.org/public/rfc | ||||
/bibxml3/reference.I-D.draft-ietf-tcpm-tcp-edo-10.xml"> | ||||
<!ENTITY I-D.mcquistin-augmented-ascii-diagrams SYSTEM "https://xml2rfc.tools.ie | ||||
tf.org/public/rfc/bibxml3/reference.I-D.draft-mcquistin-augmented-ascii-diagrams | ||||
-08.xml"> | ||||
<!ENTITY I-D.iab-use-it-or-lose-it SYSTEM "https://xml2rfc.tools.ietf.org/public | ||||
/rfc/bibxml3/reference.I-D.draft-iab-use-it-or-lose-it-02.xml"> | ||||
<!-- Fudge for XMLmind which doesn't have this built in --> | ||||
<!ENTITY nbsp " "> | ||||
]> | ]> | |||
<rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="std" obsoletes="793, 8 | ||||
<!-- Extra statement used by XSLT processors to control the output style. --> | 79, 2873, 6093, 6429, 6528, 6691" updates="1011, 1122, 5961" ipr="pre5378Trust20 | |||
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> | 0902" number="9293" docName="draft-ietf-tcpm-rfc793bis-28" submissionType="IETF" | |||
consensus="true" xml:lang="en" tocInclude="true" tocDepth="3" symRefs="false" s | ||||
<!-- Processing Instructions can be placed here but if you are editing | ortRefs="true" version="3"> | |||
with XMLmind (and maybe other XML editors) they are better placed | ||||
after the rfc element start tag as shown below. --> | ||||
<!-- Information about the document. | ||||
category values: std, bcp, info, exp, and historic | ||||
For Internet-Drafts, specify attribute "ipr". | ||||
(ipr values are: full3667, noModification3667, noDerivatives3667), | ||||
Also for Internet-Drafts, can specify values for | ||||
attributes "docName" and, if relevant, "iprExtract". Note | ||||
that the value for iprExtract is the anchor attribute | ||||
value of a section (such as a MIB specification) that can be | ||||
extracted for separate publication, and is only | ||||
useful whenhe value of "ipr" is not "full3667". --> | ||||
<rfc | ||||
category="std" | ||||
obsoletes="793, 879, 2873, 6093, 6429, 6528, 6691" | ||||
updates="5961, 1011, 1122" | ||||
ipr="pre5378Trust200902" | ||||
docName="draft-ietf-tcpm-rfc793bis-28" > | ||||
<!-- Processing Instructions- PIs (for a complete list and description, | ||||
see file http://xml.resource.org/authoring/README.html and below... -- | ||||
> | ||||
<!-- Some of the more generally applicable PIs that most I-Ds might want to | ||||
use --> | ||||
<!-- Try to enforce the ID-nits conventions and DTD validity --> | ||||
<?rfc strict="yes" ?> | ||||
<!-- Items used when reviewing the document --> | ||||
<?rfc comments="no" ?> <!-- Controls display of <cref> elements --> | ||||
<?rfc inline="no" ?> <!-- When no, put comments at end in comments sectio | ||||
n, | ||||
otherwise, put inline --> | ||||
<?rfc editing="no" ?> <!-- When yes, insert editing marks: editing marks c | ||||
onsist of a | ||||
string such as <29> printed in the blank line a | ||||
t the | ||||
beginning of each paragraph of text. --> | ||||
<!-- Create Table of Contents (ToC) and set some options for it. | ||||
Note the ToC may be omitted for very short documents,but idnits insists | ||||
on a ToC | ||||
if the document has more than 15 pages. --> | ||||
<?rfc toc="yes"?> | ||||
<?rfc tocompact="yes"?> <!-- If "yes" eliminates blank lines before main sect | ||||
ion entries. --> | ||||
<?rfc tocdepth="3"?> <!-- Sets the number of levels of sections/subsection | ||||
s... in ToC --> | ||||
<!-- Choose the options for the references. | ||||
Some like symbolic tags in the references (and citations) and others pr | ||||
efer | ||||
numbers. The RFC Editor always uses symbolic tags. | ||||
The tags used are the anchor attributes of the references. --> | ||||
<?rfc symrefs="no"?> | ||||
<?rfc sortrefs="yes" ?> <!-- If "yes", causes the references to be sorted in | ||||
order of tags. | ||||
This doesn't have any effect unless symrefs is | ||||
"yes" also. --> | ||||
<!-- These two save paper: Just setting compact to "yes" makes savings by no | ||||
t starting each | ||||
main section on a new page but does not omit the blank lines between li | ||||
st items. | ||||
If subcompact is also "yes" the blank lines between list items are also | ||||
omitted. --> | ||||
<?rfc compact="yes" ?> | ||||
<?rfc subcompact="no" ?> | ||||
<!-- end of list of popular I-D processing instructions --> | ||||
<!-- ***** FRONT MATTER ***** --> | ||||
<front> | <front> | |||
<!-- The abbreviated title is used in the page header - it is only necessary | <title abbrev="TCP">Transmission Control Protocol (TCP)</title> | |||
if the | <seriesInfo name="STD" value="7" /> | |||
full title is longer than 42 characters --> | <seriesInfo name="RFC" value="9293" /> | |||
<title abbrev="TCP Specification">Transmission Control Protocol (TCP) Specif | <author fullname="Wesley M. Eddy" initials="W." surname="Eddy" role="editor" | |||
ication</title> | > | |||
<!-- add 'role="editor"' below for the editors if appropriate --> | ||||
<author | ||||
fullname="Wesley M. Eddy" | ||||
initials="W." | ||||
surname="Eddy" | ||||
role="editor"> | ||||
<!-- abbrev not needed but can be used for the header | ||||
if the full organization name is too long --> | ||||
<organization abbrev="MTI Systems">MTI Systems</organization> | <organization abbrev="MTI Systems">MTI Systems</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<!-- I've omitted my street address here --> | <country>United States of America</country> | |||
<street/> | </postal> | |||
<city/> | ||||
<!-- | ||||
The IETF seems to meet once a year in Minneapolis, | ||||
so that's practically my US address. If so, I would | ||||
add the following elements: | ||||
<region>MN</region> | ||||
<code>55403</code> | ||||
However, if I lived in France, the <code> comes before the city. | ||||
xml2rfc | ||||
preserves the order of <city>, <region>, <code> and <country> el | ||||
ements in | ||||
output so that they can reflect any possible the national scheme | ||||
--> | ||||
<!-- The country element is supposed to contain an ISO3166 two l | ||||
etter country | ||||
code. --> | ||||
<country>US</country> | ||||
</postal> | ||||
<email>wes@mti-systems.com</email> | <email>wes@mti-systems.com</email> | |||
<!-- | ||||
If I had a phone, fax machine, and a URI, I could add the following: | ||||
<phone>+1-408-555-1234</phone> | ||||
<facsimile>+1-555-911-9111</facsimile> | ||||
<uri>http://www.example.com/</uri> | ||||
--> | ||||
</address> | ||||
</author> | ||||
<!-- | ||||
<author | ||||
fullname="Andre Oppermann" | ||||
initials="A." | ||||
surname="Oppermann"> | ||||
<organization>FreeBSD</organization> | ||||
<address> | ||||
<email>andre@freebsd.org</email> | ||||
</address> | </address> | |||
</author> | </author> | |||
<date year="2022"/> <!-- month="March" is no longer necessary | <date year="2022" month="August"/> | |||
note also, day="30" is optional --> | ||||
<!-- WARNING: If the month and year are the current ones, xml2rfc will fill | ||||
in the day for | ||||
you. If only the year is specified, xml2rfc will fill in the current da | ||||
y and month | ||||
irrespective of the day. This silliness should be fixed in v1.31. --> | ||||
<!-- Meta-data Declarations --> | ||||
<!-- Notice the use of & as an escape for & which would otherwise | ||||
start an entity declaration, whereas we want a literal &. --> | ||||
<area>Transport</area> | <area>Transport</area> | |||
<workgroup>TCPM</workgroup> | ||||
<!-- WG name at the upperleft corner of the doc, | <keyword>TCP</keyword> | |||
IETF fine for individual submissions. You can also | <keyword>TCPM</keyword> | |||
omit this element in which case in defaults to "Network Working Group" | <keyword>transport layer</keyword> | |||
- | <keyword>internet transport</keyword> | |||
a hangover from the ancient history of the IETF! --> | ||||
<workgroup>Internet Engineering Task Force</workgroup> | ||||
<!-- The DTD allows multiple area and workgroup elements but only the first | ||||
one has any | ||||
effect on output. --> | ||||
<!-- You can add <keyword/> elements here. They will be incorporated into H | ||||
TML output | ||||
files in a meta tag but they have no effect on text or nroff output. -- | ||||
> | ||||
<abstract> | <abstract> | |||
<t>This document specifies the Transmission Control Protocol (TCP). TCP | <t>This document specifies the Transmission Control Protocol (TCP). TCP i | |||
is an important transport layer protocol in the Internet protocol stack, and ha | s an important transport-layer protocol in the Internet protocol stack, and it h | |||
s continuously evolved over decades of use and growth of the Internet. Over thi | as continuously evolved over decades of use and growth of the Internet. Over th | |||
s time, a number of changes have been made to TCP as it was specified in RFC 793 | is time, a number of changes have been made to TCP as it was specified in RFC 79 | |||
, though these have only been documented in a piecemeal fashion. This document | 3, though these have only been documented in a piecemeal fashion. This document | |||
collects and brings those changes together with the protocol specification from | collects and brings those changes together with the protocol specification from | |||
RFC 793. This document obsoletes RFC 793, as well as RFCs 879, 2873, 6093, 6429 | RFC 793. This document obsoletes RFC 793, as well as RFCs 879, 2873, 6093, 642 | |||
, 6528, and 6691 that updated parts of RFC 793. It updates RFCs 1011 and 1122, | 9, 6528, and 6691 that updated parts of RFC 793. It updates RFCs 1011 and 1122, | |||
and should be considered as a replacement for the portions of those document dea | and it should be considered as a replacement for the portions of those document | |||
ling with TCP requirements. It also updates RFC 5961 by adding a small clarific | s dealing with TCP requirements. It also updates RFC 5961 by adding a small cla | |||
ation in reset handling while in the SYN-RECEIVED state. The TCP header control | rification in reset handling while in the SYN-RECEIVED state. The TCP header co | |||
bits from RFC 793 have also been updated based on RFC 3168.</t> | ntrol bits from RFC 793 have also been updated based on RFC 3168.</t> | |||
<t>RFC EDITOR NOTE: If approved for publication as an RFC, this should b | ||||
e marked additionally as "STD: 7" and replace RFC 793 in that role.</t> | ||||
</abstract> | </abstract> | |||
</front> | ||||
</front> | <middle> | |||
<section numbered="true" toc="default"> | ||||
<middle> | <name>Purpose and Scope</name> | |||
<section title="Purpose and Scope"> | <t> | |||
<t> | In 1981, <xref target="RFC0793" format="default">RFC 793</xref> was rele | |||
In 1981, <xref target="RFC0793">RFC 793</xref> was released, documenting | ased, documenting the Transmission Control Protocol (TCP) and replacing earlier | |||
the Transmission Control Protocol (TCP), and replacing earlier specifications f | published specifications for TCP. | |||
or TCP that had been published in the past. | </t> | |||
</t> | <t> | |||
<t> | Since then, TCP has been widely implemented, and it has been used as a t | |||
Since then, TCP has been widely implemented, and has been used as a tran | ransport protocol for numerous applications on the Internet. | |||
sport protocol for numerous applications on the Internet. | </t> | |||
</t> | <t> | |||
<t> | For several decades, RFC 793 plus a number of other documents have combi | |||
For several decades, RFC 793 plus a number of other documents have combi | ned to serve as the core specification for TCP <xref target="RFC7414" format="de | |||
ned to serve as the core specification for TCP <xref target="RFC7414"></xref>. | fault"/>. Over time, a number of errata have been filed against RFC 793. There | |||
Over time, a number of errata have been filed against RFC 793. There have also | have also been deficiencies found and resolved in security, performance, and ma | |||
been deficiencies found and resolved in security, performance, and many other as | ny other aspects. The number of enhancements has grown over time across many se | |||
pects. The number of enhancements has grown over time across many separate docu | parate documents. These were never accumulated together into a comprehensive up | |||
ments. These were never accumulated together into a comprehensive update to the | date to the base specification. | |||
base specification. | </t> | |||
</t> | <t> | |||
<t> | The purpose of this document is to bring together all of the IETF Standa | |||
The purpose of this document is to bring together all of the IETF Standa | rds Track changes and other clarifications that have been made to the base TCP f | |||
rds Track changes and other clarifications that have been made to the base TCP f | unctional specification (RFC 793) and to unify them into an updated version of t | |||
unctional specification and unify them into an updated version of RFC 793. | he specification. | |||
</t> | </t> | |||
<t> | <t> | |||
Some companion documents are referenced for important algorithms that are | Some companion documents are referenced for important algorithms that are | |||
used by TCP (e.g. for congestion control), but have not been completely include | used by TCP (e.g., for congestion control) but have not been completely include | |||
d in this document. This is a conscious choice, as this base specification can | d in this document. This is a conscious choice, as this base specification can | |||
be used with multiple additional algorithms that are developed and incorporated | be used with multiple additional algorithms that are developed and incorporated | |||
separately. This document focuses on the common basis all TCP implementations mu | separately. This document focuses on the common basis that all TCP implementatio | |||
st support in order to interoperate. Since some additional TCP features have be | ns must support in order to interoperate. Since some additional TCP features ha | |||
come quite complicated themselves (e.g. advanced loss recovery and congestion co | ve become quite complicated themselves (e.g., advanced loss recovery and congest | |||
ntrol), future companion documents may attempt to similarly bring these together | ion control), future companion documents may attempt to similarly bring these to | |||
. | gether. | |||
</t> | </t> | |||
<t> | <t> | |||
In addition to the protocol specification that describes the TCP segment | In addition to the protocol specification that describes the TCP segment | |||
format, generation, and processing rules that are to be implemented in code, RF | format, generation, and processing rules that are to be implemented in code, RF | |||
C 793 and other updates also contain informative and descriptive text for reader | C 793 and other updates also contain informative and descriptive text for reader | |||
s to understand aspects of the protocol design and operation. This document doe | s to understand aspects of the protocol design and operation. This document doe | |||
s not attempt to alter or update this informative text, and is focused only on u | s not attempt to alter or update this informative text and is focused only on up | |||
pdating the normative protocol specification. This document preserves reference | dating the normative protocol specification. This document preserves references | |||
s to the documentation containing the important explanations and rationale, wher | to the documentation containing the important explanations and rationale, where | |||
e appropriate. | appropriate. | |||
</t> | </t> | |||
<t> | <t> | |||
This document is intended to be useful both in checking existing TCP imp lementations for conformance purposes, as well as in writing new implementations . | This document is intended to be useful both in checking existing TCP imp lementations for conformance purposes, as well as in writing new implementations . | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Introduction"> | <section numbered="true" toc="default"> | |||
<t>RFC 793 contains a discussion of the TCP design goals and provides ex | <name>Introduction</name> | |||
amples of its operation, including examples of connection establishment, connect | <t>RFC 793 contains a discussion of the TCP design goals and provides exam | |||
ion termination, and packet retransmission to repair losses. | ples of its operation, including examples of connection establishment, connectio | |||
</t> | n termination, and packet retransmission to repair losses. | |||
<t> | </t> | |||
This document describes the basic functionality expected in modern TCP i | <t> | |||
mplementations, and replaces the protocol specification in RFC 793. It does not | This document describes the basic functionality expected in modern TCP i | |||
replicate or attempt to update the introduction and philosophy content in Secti | mplementations and replaces the protocol specification in RFC 793. It does not | |||
ons 1 and 2 of RFC 793. Other documents are referenced to provide explanation o | replicate or attempt to update the introduction and philosophy content in Sectio | |||
f the theory of operation, rationale, and detailed discussion of design decision | ns 1 and 2 of RFC 793. Other documents are referenced to provide explanations o | |||
s. This document only focuses on the normative behavior of the protocol. | f the theory of operation, rationale, and detailed discussion of design decision | |||
</t> | s. This document only focuses on the normative behavior of the protocol. | |||
<t> | </t> | |||
The "TCP Roadmap" <xref target="RFC7414"/> provides a more exte | <t> | |||
nsive guide to the RFCs that define TCP and describe various important algorithm | The "TCP Roadmap" <xref target="RFC7414" format="default"/> provides a mo | |||
s. The TCP Roadmap contains sections on strongly encouraged enhancements that im | re extensive guide to the RFCs that define TCP and describe various important al | |||
prove performance and other aspects of TCP beyond the basic operation specified | gorithms. The TCP Roadmap contains sections on strongly encouraged enhancements | |||
in this document. As one example, implementing congestion control (e.g. <xref t | that improve performance and other aspects of TCP beyond the basic operation spe | |||
arget="RFC5681"/>) is a TCP requirement, but is a complex topic on its own, and | cified in this document. As one example, implementing congestion control (e.g., | |||
not described in detail in this document, as there are many options and possibil | <xref target="RFC5681" format="default"/>) is a TCP requirement, but it is a co | |||
ities that do not impact basic interoperability. Similarly, most TCP implementa | mplex topic on its own and not described in detail in this document, as there ar | |||
tions today include the high-performance extensions in <xref target="RFC7323"/>, | e many options and possibilities that do not impact basic interoperability. Sim | |||
but these are not strictly required or discussed in this document. Multipath c | ilarly, most TCP implementations today include the high-performance extensions i | |||
onsiderations for TCP are also specified separately in <xref target="RFC8684"/>. | n <xref target="RFC7323" format="default"/>, but these are not strictly required | |||
</t> | or discussed in this document. Multipath considerations for TCP are also speci | |||
fied separately in <xref target="RFC8684" format="default"/>. | ||||
</t> | ||||
<t> | ||||
A list of changes from RFC 793 is contained in <xref target="changes" fo | ||||
rmat="default"/>. | ||||
</t> | ||||
<section numbered="true" toc="default"> | ||||
<name>Requirements Language</name> | ||||
<t> | <t> | |||
A list of changes from RFC 793 is contained in <xref target="changes"/>. | ||||
</t> | ||||
<section title="Requirements Language"> | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUIRED | |||
<t> | </bcp14>", | |||
"<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<b | ||||
The key words "MUST", "MUST NOT", "REQUIRED", | cp14>SHOULD | |||
"SHALL", "SHALL NOT", "SHOULD", "SHOULD | NOT</bcp14>", "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
NOT", "RECOMMENDED", "NOT RECOMMENDED", | "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to be | |||
"MAY", and "OPTIONAL" in this document are to be | interpreted as described in BCP 14 <xref target="RFC2119"/> <xref target="R | |||
interpreted as described in BCP 14 <xref target="RFC2119"/><xref | FC8174"/> when, and only when, they appear in all capitals, as shown | |||
target="RFC8174"/> when, and only when, they appear in all capitals, as shown | ||||
here. | here. | |||
</t> | </t> | |||
<t> | <t> | |||
Each use of RFC 2119 keywords in the document is individually labeled and | Each use of RFC 2119 keywords in the document is individually labeled and | |||
referenced in <xref target="reqs"/> that summarizes implementation | referenced in <xref target="reqs" format="default"/>, which summarizes implement ation | |||
requirements. | requirements. | |||
</t> | </t> | |||
<t> | <t> | |||
Sentences using "MUST" are labeled as "MUST-X" with X being | Sentences using "<bcp14>MUST</bcp14>" are labeled as "MUST-X" with X being | |||
a numeric identifier enabling the requirement to be located easily when | a numeric identifier enabling the requirement to be located easily when | |||
referenced from <xref target="reqs"/>. | referenced from <xref target="reqs" format="default"/>. | |||
</t> | </t> | |||
<t> | <t> | |||
Similarly, sentences using "SHOULD" are labeled with | Similarly, sentences using "<bcp14>SHOULD</bcp14>" are labeled with | |||
"SHLD-X", "MAY" with "MAY-X", and | "SHLD-X", "<bcp14>MAY</bcp14>" with "MAY-X", and | |||
"RECOMMENDED" with "REC-X". | "<bcp14>RECOMMENDED</bcp14>" with "REC-X". | |||
</t> | </t> | |||
<t> | <t> | |||
For the purposes of this labeling, "SHOULD NOT" and "MUST | For the purposes of this labeling, "<bcp14>SHOULD NOT</bcp14>" and "<bcp14>MUST | |||
NOT" are labeled the same as "SHOULD" and "MUST" | NOT</bcp14>" are labeled the same as "<bcp14>SHOULD</bcp14>" and "<bcp14>MUST</b | |||
cp14>" | ||||
instances. | instances. | |||
</t> | </t> | |||
</section> | ||||
</section> | <section numbered="true" toc="default"> | |||
<name>Key TCP Concepts</name> | ||||
<section title="Key TCP Concepts"> | <t>TCP provides a reliable, in-order, byte-stream service to application | |||
s.</t> | ||||
<t>TCP provides a reliable, in-order, byte-stream service to applications.</t> | <t>The application byte-stream is conveyed over the network via TCP segm | |||
ents, | ||||
<t>The application byte-stream is conveyed over the network via TCP segments, | ||||
with each TCP segment sent as an Internet Protocol (IP) datagram.</t> | with each TCP segment sent as an Internet Protocol (IP) datagram.</t> | |||
<t>TCP reliability consists of detecting packet losses (via sequence num | ||||
<t>TCP reliability consists of detecting packet losses (via sequence numbers) | bers) | |||
and errors (via per-segment checksums), as well as correction | and errors (via per-segment checksums), as well as correction | |||
via retransmission.</t> | via retransmission.</t> | |||
<t>TCP supports unicast delivery of data. There are anycast application | ||||
<t>TCP supports unicast delivery of data. Anycast applications exist that | s that | |||
successfully use TCP without modifications, though there is some risk of | can successfully use TCP without modifications, though there is some risk of | |||
instability due to changes of lower-layer forwarding behavior <xref target="RFC7 | instability due to changes of lower-layer forwarding behavior <xref target="RFC7 | |||
094"/>.</t> | 094" format="default"/>.</t> | |||
<t>TCP is connection oriented, though it does not inherently include a l | ||||
<t>TCP is connection-oriented, though does not inherently include a liveness | iveness | |||
detection capability.</t> | detection capability.</t> | |||
<t>Data flow is supported bidirectionally over TCP connections, though | ||||
<t>Data flow is supported bidirectionally over TCP connections, though | ||||
applications are free to send data only unidirectionally, if they so | applications are free to send data only unidirectionally, if they so | |||
choose.</t> | choose.</t> | |||
<t>TCP uses port numbers to identify application services and to multipl | ||||
<t>TCP uses port numbers to identify application services and to multiplex | ex | |||
distinct flows between hosts.</t> | distinct flows between hosts.</t> | |||
<t>A more detailed description of TCP features compared to other transpo | ||||
<t>A more detailed description of TCP features compared to other transport | rt | |||
protocols can be found in Section 3.1 of <xref target="RFC8095"/>. Further | protocols can be found in <xref target="RFC8095" section="3.1" sectionFormat="of | |||
" format="default"/>. Further | ||||
description of the motivations for developing TCP and its role in the Internet | description of the motivations for developing TCP and its role in the Internet | |||
protocol stack can be found in Section 2 of <xref target="RFC0793"/> and earlier versions | protocol stack can be found in <xref target="RFC0793" section="2" sectionFormat= "of" format="default"/> and earlier versions | |||
of the TCP specification.</t> | of the TCP specification.</t> | |||
</section> | ||||
</section> | ||||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Functional Specification"> | <name>Functional Specification</name> | |||
<section numbered="true" toc="default"> | ||||
<section title="Header Format"> | <name>Header Format</name> | |||
<t> | ||||
<t> | ||||
TCP segments are sent as internet datagrams. The Internet Protocol (IP) | TCP segments are sent as internet datagrams. The Internet Protocol (IP) | |||
header carries several information fields, including the source and | header carries several information fields, including the source and | |||
destination host addresses <xref target="RFC0791"/> <xref target="RFC8200"/>. | destination host addresses <xref target="RFC0791" format="default"/> <xref tar | |||
A TCP header follows the IP | get="RFC8200" format="default"/>. A TCP header follows the IP | |||
headers, supplying information specific to the TCP protocol. This | headers, supplying information specific to TCP. This | |||
division allows for the existence of host level protocols other than | division allows for the existence of host-level protocols other than | |||
TCP. In early development of the Internet suite of protocols, the IP header f | TCP. In the early development of the Internet suite of protocols, the IP head | |||
ields had been a part of TCP. | er fields had been a part of TCP. | |||
</t> | </t> | |||
<t> | <t> | |||
This document describes the TCP protocol. The TCP protocol uses TCP Headers. | This document describes TCP, which uses TCP headers. | |||
</t> | </t> | |||
<t>A TCP Header, followed by any user data in the segment, is formatted as follo | <t>A TCP header, followed by any user data in the segment, is formatted | |||
ws, using the style from <xref target="I-D.mcquistin-augmented-ascii-diagrams"/> | as follows, using the style from <xref target="I-D.mcquistin-augmented-ascii-dia | |||
:</t> | grams" format="default"/>:</t> | |||
<figure anchor="header_format" title="TCP Header Format"> | <figure anchor="header_format"> | |||
<artwork> | <name>TCP Header Format</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Source Port | Destination Port | | | Source Port | Destination Port | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Sequence Number | | | Sequence Number | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Acknowledgment Number | | | Acknowledgment Number | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Data | |C|E|U|A|P|R|S|F| | | | Data | |C|E|U|A|P|R|S|F| | | |||
skipping to change at line 399 ¶ | skipping to change at line 180 ¶ | |||
| Checksum | Urgent Pointer | | | Checksum | Urgent Pointer | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| [Options] | | | [Options] | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| : | | : | |||
: Data : | : Data : | |||
: | | : | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Note that one tick mark represents one bit position. | Note that one tick mark represents one bit position. | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
where: | where: | |||
<list style="hanging" hangIndent="2"> | </t> | |||
<t hangText="Source Port: 16 bits."> | <dl newline="false" spacing="normal" indent="2"> | |||
<vspace /> | <dt>Source Port:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
16 bits | ||||
</t> | ||||
<t> | ||||
The source port number. | The source port number. | |||
</t> | </t> | |||
<t hangText="Destination Port: 16 bits."> | </dd> | |||
<vspace /> | <dt>Destination Port:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
16 bits | ||||
</t> | ||||
<t> | ||||
The destination port number. | The destination port number. | |||
</t> | </t> | |||
<t hangText="Sequence Number: 32 bits."> | </dd> | |||
<vspace /> | <dt>Sequence Number:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
32 bits | ||||
</t> | ||||
<t> | ||||
The sequence number of the first data octet in this segment (except | The sequence number of the first data octet in this segment (except | |||
when the SYN flag is set). If SYN is set the sequence number is the | when the SYN flag is set). If SYN is set, the sequence number is the | |||
initial sequence number (ISN) and the first data octet is ISN+1. | initial sequence number (ISN) and the first data octet is ISN+1. | |||
</t> | </t> | |||
<t hangText="Acknowledgment Number: 32 bits."> | </dd> | |||
<vspace /> | <dt>Acknowledgment Number:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
32 bits | ||||
</t> | ||||
<t> | ||||
If the ACK control bit is set, this field contains the value of the | If the ACK control bit is set, this field contains the value of the | |||
next sequence number the sender of the segment is expecting to | next sequence number the sender of the segment is expecting to | |||
receive. Once a connection is established, this is always sent. | receive. Once a connection is established, this is always sent. | |||
</t> | </t> | |||
<t hangText="Data Offset (DOffset): 4 bits."> | </dd> | |||
<vspace /> | <dt>Data Offset (DOffset):</dt> | |||
<vspace /> | <dd> | |||
The number of 32 bit words in the TCP Header. This indicates where | <t> | |||
4 bits | ||||
</t> | ||||
<t> | ||||
The number of 32-bit words in the TCP header. This indicates where | ||||
the data begins. The TCP header (even one including options) is an | the data begins. The TCP header (even one including options) is an | |||
integer multiple of 32 bits long. | integer multiple of 32 bits long. | |||
</t> | </t> | |||
<t hangText="Reserved (Rsrvd): 4 bits."> | </dd> | |||
<vspace /> | <dt>Reserved (Rsrvd):</dt> | |||
<vspace /> | <dd> | |||
A set of control bits reserved for future use. Must be zero in generated se | <t> | |||
gments and must be ignored in received segments, if corresponding future feature | 4 bits | |||
s are unimplemented by the sending or receiving host. | </t> | |||
</t> | <t> | |||
<t> | A set of control bits reserved for future use. Must be zero in generated se | |||
The control bits are also known as "flags". Assignment is managed | gments and must be ignored in received segments if the corresponding future feat | |||
by IANA from the "TCP Header Flags" registry <xref target="header-flag | ures are not implemented by the sending or receiving host. | |||
s-registry"/>. The currently assigned control bits are CWR, ECE, URG, ACK, PSH, | </t> | |||
RST, SYN, and FIN. | </dd> | |||
</t> | <dt>Control bits:</dt> | |||
<t hangText="CWR: 1 bit."> | <dd> | |||
<vspace /> | <t> | |||
<vspace /> | The control bits are also known as "flags". Assignment is managed by IANA fr | |||
Congestion Window Reduced (see <xref target="RFC3168"/>). | om the "TCP Header Flags" registry <xref target="TCP-parameters-registry" format | |||
</t> | ="default"/>. The currently assigned control bits are CWR, ECE, URG, ACK, PSH, | |||
<t hangText="ECE: 1 bit."> | RST, SYN, and FIN. | |||
<vspace /> | </t> | |||
<vspace /> | <dl newline="false" spacing="normal" indent="4"> | |||
ECN-Echo (see <xref target="RFC3168"/>). | <dt>CWR:</dt> | |||
</t> | <dd> | |||
<t hangText="URG: 1 bit."> | <t> | |||
<vspace /> | 1 bit | |||
<vspace /> | </t> | |||
Urgent Pointer field is significant. | <t> | |||
</t> | Congestion Window Reduced (see <xref target="RFC3168" format="default"/>). | |||
<t hangText="ACK: 1 bit."> | </t> | |||
<vspace /> | </dd> | |||
<vspace /> | <dt>ECE:</dt> | |||
<dd> | ||||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
ECN-Echo (see <xref target="RFC3168" format="default"/>). | ||||
</t> | ||||
</dd> | ||||
<dt>URG:</dt> | ||||
<dd> | ||||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
Urgent pointer field is significant. | ||||
</t> | ||||
</dd> | ||||
<dt>ACK:</dt> | ||||
<dd> | ||||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
Acknowledgment field is significant. | Acknowledgment field is significant. | |||
</t> | </t> | |||
<t hangText="PSH: 1 bit."> | </dd> | |||
<vspace /> | <dt>PSH:</dt> | |||
<vspace /> | <dd> | |||
Push Function (see the Send Call description in <xref target="user-api"/>). | <t> | |||
</t> | 1 bit | |||
<t hangText="RST: 1 bit."> | </t> | |||
<vspace /> | <t> | |||
<vspace /> | Push function (see the Send Call description in <xref target="user-api" form | |||
at="default"/>). | ||||
</t> | ||||
</dd> | ||||
<dt>RST:</dt> | ||||
<dd> | ||||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
Reset the connection. | Reset the connection. | |||
</t> | </t> | |||
<t hangText="SYN: 1 bit."> | </dd> | |||
<vspace /> | <dt>SYN:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
Synchronize sequence numbers. | Synchronize sequence numbers. | |||
</t> | </t> | |||
<t hangText="FIN: 1 bit."> | </dd> | |||
<vspace /> | <dt>FIN:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 bit | ||||
</t> | ||||
<t> | ||||
No more data from sender. | No more data from sender. | |||
</t> | </t> | |||
<t hangText="Window: 16 bits."> | </dd> | |||
<vspace /> | </dl> | |||
<vspace /> | </dd> | |||
<dt>Window:</dt> | ||||
<dd> | ||||
<t> | ||||
16 bits | ||||
</t> | ||||
<t> | ||||
The number of data octets beginning with the one indicated in the | The number of data octets beginning with the one indicated in the | |||
acknowledgment field that the sender of this segment is willing to | acknowledgment field that the sender of this segment is willing to | |||
accept. The value is shifted when the Window Scaling extension is used | accept. The value is shifted when the window scaling extension is used | |||
<xref target="RFC7323"/>. | <xref target="RFC7323" format="default"/>. | |||
<vspace /> | </t> | |||
<vspace /> | <t> | |||
The window size MUST be treated as an unsigned number, or else | The window size <bcp14>MUST</bcp14> be treated as an unsigned number, or els | |||
e | ||||
large window sizes will appear like negative windows and TCP will | large window sizes will appear like negative windows and TCP will | |||
not work (MUST-1). It is RECOMMENDED that implementations will reserve | not work (MUST-1). It is <bcp14>RECOMMENDED</bcp14> that implementations wi ll reserve | |||
32-bit fields for the send and receive window sizes in the connection | 32-bit fields for the send and receive window sizes in the connection | |||
record and do all window computations with 32 bits (REC-1). | record and do all window computations with 32 bits (REC-1). | |||
</t> | </t> | |||
<t hangText="Checksum: 16 bits."> | </dd> | |||
<vspace /> | <dt>Checksum:</dt> | |||
<vspace /> | <dd> | |||
The checksum field is the 16 bit ones' complement of the ones' | <t> | |||
complement sum of all 16 bit words in the header and text. The checksum com | 16 bits | |||
putation needs to ensure the 16-bit alignment of the data being summed. If a | </t> | |||
<t> | ||||
The checksum field is the 16-bit ones' complement of the ones' | ||||
complement sum of all 16-bit words in the header and text. The checksum com | ||||
putation needs to ensure the 16-bit alignment of the data being summed. If a | ||||
segment contains an odd number of header and text octets, alignment can be a chieved by | segment contains an odd number of header and text octets, alignment can be a chieved by | |||
padding the last octet with zeros on its right to | padding the last octet with zeros on its right to | |||
form a 16 bit word for checksum purposes. The pad is not | form a 16-bit word for checksum purposes. The pad is not | |||
transmitted as part of the segment. While computing the checksum, | transmitted as part of the segment. While computing the checksum, | |||
the checksum field itself is replaced with zeros. | the checksum field itself is replaced with zeros. | |||
</t> | </t> | |||
<t> | <t> | |||
The checksum also covers a pseudo header (<xref target="v4pseudo"/>) concept | The checksum also covers a pseudo-header (<xref target="v4pseudo" format="de | |||
ually prefixed to the TCP | fault"/>) conceptually prefixed to the TCP | |||
header. The pseudo header is 96 bits for IPv4 and 320 bits for IPv6. | header. The pseudo-header is 96 bits for IPv4 and 320 bits for IPv6. | |||
Including the pseudo header in the checksum gives the TCP connection | Including the pseudo-header in the checksum gives the TCP connection | |||
protection against misrouted segments. This information is carried in IP he aders | protection against misrouted segments. This information is carried in IP he aders | |||
and is transferred across the TCP/Network interface in the arguments or | and is transferred across the TCP/network interface in the arguments or | |||
results of calls by the TCP implementation on the IP layer. | results of calls by the TCP implementation on the IP layer. | |||
<figure anchor="v4pseudo" title="IPv4 Pseudo Header"><artwork> | </t> | |||
<figure anchor="v4pseudo"> | ||||
<name>IPv4 Pseudo-header</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
+--------+--------+--------+--------+ | +--------+--------+--------+--------+ | |||
| Source Address | | | Source Address | | |||
+--------+--------+--------+--------+ | +--------+--------+--------+--------+ | |||
| Destination Address | | | Destination Address | | |||
+--------+--------+--------+--------+ | +--------+--------+--------+--------+ | |||
| zero | PTCL | TCP Length | | | zero | PTCL | TCP Length | | |||
+--------+--------+--------+--------+ | +--------+--------+--------+--------+ | |||
</artwork></figure> | ]]></artwork> | |||
<list style="hanging" hangIndent="2"> | </figure> | |||
<t hangText="Pseudo header components for IPv4:"> | <dl newline="true" spacing="normal" indent="2"> | |||
<vspace /> | <dt>Pseudo-header components for IPv4:</dt> | |||
<vspace /> | <dd> | |||
<list> | <dl newline="false" spacing="normal"> | |||
<t>Source Address: the IPv4 source address in network byte order</t> | <dt>Source Address:</dt> | |||
<t>Destination Address: the IPv4 destination address in network byte order< | <dd>the IPv4 source address in network byte order</dd> | |||
/t> | <dt>Destination Address:</dt> | |||
<t>zero: bits set to zero</t> | <dd>the IPv4 destination address in network byte order</dd> | |||
<t>PTCL: the protocol number from the IP header</t> | <dt>zero:</dt> | |||
<t>TCP Length: | <dd>bits set to zero</dd> | |||
<dt>PTCL:</dt> | ||||
<dd>the protocol number from the IP header</dd> | ||||
<dt>TCP Length:</dt> | ||||
<dd> | ||||
the TCP header length plus the data length in | the TCP header length plus the data length in | |||
octets (this is not an explicitly transmitted quantity, but is | octets (this is not an explicitly transmitted quantity but is | |||
computed), and it does not count the 12 octets of the pseudo | computed), and it does not count the 12 octets of the pseudo-header.</dd> | |||
header.</t> | </dl> | |||
</list> | </dd> | |||
</t> | </dl> | |||
<t> | <t> | |||
For IPv6, the pseudo header is defined in Section 8.1 of RFC 8200 <xref t | For IPv6, the pseudo-header is defined in Section <xref target="RFC8200" | |||
arget="RFC8200"/>, and contains the IPv6 Source Address and Destination Address, | section="8.1" sectionFormat="bare" format="default"/> of RFC 8200 <xref target=" | |||
an Upper Layer Packet Length (a 32-bit value otherwise equivalent to TCP Length | RFC8200" format="default"/> and contains the IPv6 Source Address and Destination | |||
in the IPv4 pseudo header), three bytes of zero-padding, and a Next Header valu | Address, an Upper-Layer Packet Length (a 32-bit value otherwise equivalent to T | |||
e (differing from the IPv6 header value in the case of extension headers present | CP Length in the IPv4 pseudo-header), three bytes of zero padding, and a Next He | |||
in between IPv6 and TCP). | ader value, which differs from the IPv6 header value if there are extension head | |||
ers present between IPv6 and TCP. | ||||
</t> | </t> | |||
<t> | <t> | |||
The TCP checksum is never optional. The sender MUST generate it (MUST-2) | The TCP checksum is never optional. The sender <bcp14>MUST</bcp14> gener | |||
and the receiver MUST check it (MUST-3). | ate it (MUST-2) | |||
and the receiver <bcp14>MUST</bcp14> check it (MUST-3). | ||||
</t> | </t> | |||
</list> | </dd> | |||
</t> | <dt>Urgent Pointer:</dt> | |||
<dd> | ||||
<t hangText="Urgent Pointer: 16 bits."> | <t> | |||
<vspace /> | 16 bits | |||
<vspace /> | </t> | |||
<t> | ||||
This field communicates the current value of the urgent pointer as a | This field communicates the current value of the urgent pointer as a | |||
positive offset from the sequence number in this segment. The | positive offset from the sequence number in this segment. The | |||
urgent pointer points to the sequence number of the octet following the urge nt data. This field is only to be interpreted in segments with | urgent pointer points to the sequence number of the octet following the urge nt data. This field is only to be interpreted in segments with | |||
the URG control bit set. | the URG control bit set. | |||
</t> | </t> | |||
<t hangText="Options: [TCP Option]; size(Options) == (DOffset-5)*32; present o | </dd> | |||
nly when DOffset > 5. Note that this size expression also includes any padding | <dt>Options:</dt> | |||
trailing the actual options present."> | <dd> | |||
<vspace /> | <t> | |||
<vspace /> | [TCP Option]; size(Options) == (DOffset-5)*32; present only when DOffset > | |||
; 5. | ||||
Note that this size expression also includes any padding trailing the actual | ||||
options present. | ||||
</t> | ||||
<t> | ||||
Options may occupy space at the end of the TCP header and are a | Options may occupy space at the end of the TCP header and are a | |||
multiple of 8 bits in length. All options are included in the | multiple of 8 bits in length. All options are included in the | |||
checksum. An option may begin on any octet boundary. There are two | checksum. An option may begin on any octet boundary. There are two | |||
cases for the format of an option: | cases for the format of an option: | |||
<list> | </t> | |||
<t>Case 1: A single octet of option-kind.</t> | <dl newline="false" spacing="normal"> | |||
<dt>Case 1:</dt> | ||||
<t>Case 2: An octet of option-kind (Kind), an octet of option-length, and | <dd>A single octet of option-kind.</dd> | |||
the actual option-data octets.</t> | <dt>Case 2:</dt> | |||
</list> | <dd>An octet of option-kind (Kind), an octet of option-length, and | |||
</t> | the actual option-data octets.</dd> | |||
<t> | </dl> | |||
<t> | ||||
The option-length counts the two octets of option-kind and | The option-length counts the two octets of option-kind and | |||
option-length as well as the option-data octets. | option-length as well as the option-data octets. | |||
</t> | </t> | |||
<t> | <t> | |||
Note that the list of options may be shorter than the data offset | Note that the list of options may be shorter than the Data Offset | |||
field might imply. The content of the header beyond the | field might imply. The content of the header beyond the | |||
End-of-Option option MUST be header padding of zeros (MUST-69). | End of Option List Option <bcp14>MUST</bcp14> be header padding of zeros (MU ST-69). | |||
</t> | </t> | |||
<t> | <t> | |||
The list of all currently defined options is managed by IANA <xref target="T | The list of all currently defined options is managed by IANA <xref target="T | |||
CP-parameters-registry"/>, and each option is defined in other RFCs, as indicate | CP-parameters-registry" format="default"/>, and each option is defined in other | |||
d there. That set includes experimental options that can be extended to support | RFCs, as indicated there. That set includes experimental options that can be ex | |||
multiple concurrent usages <xref target="RFC6994"/>.</t> | tended to support multiple concurrent usages <xref target="RFC6994" format="defa | |||
<t> | ult"/>.</t> | |||
A given TCP implementation can support any currently defined options, but the | <t> | |||
following options MUST be supported (MUST-4 - note Maximum Segment Size option | A given TCP implementation can support any currently defined options, but the | |||
support is also part of MUST-19 in <xref target="pmtud"/>):</t> | following options <bcp14>MUST</bcp14> be supported (MUST-4 -- note Maximum Segm | |||
<t> | ent Size Option support is also part of MUST-14 in <xref target="mss" format="de | |||
<figure><artwork> | fault"/>):</t> | |||
Kind Length Meaning | <table> | |||
---- ------ ------- | <name>Mandatory Option Set</name> | |||
0 - End of option list. | <thead> | |||
1 - No-Operation. | <tr> | |||
2 4 Maximum Segment Size. | <th>Kind</th> | |||
</artwork></figure></t> | <th>Length</th> | |||
<t> | <th>Meaning</th> | |||
These options are specified in detail in <xref target="Option-Definitions" / | </tr> | |||
>.<vspace/><vspace/> | </thead> | |||
A TCP implementation MUST be able to receive a TCP option in any segment (MU | <tbody> | |||
ST-5).<vspace /><vspace /> | <tr> | |||
A TCP implementation MUST (MUST-6) ignore without error any TCP option it do | <td>0</td> | |||
es not | <td>-</td> | |||
<td>End of Option List Option.</td> | ||||
</tr> | ||||
<tr> | ||||
<td>1</td> | ||||
<td>-</td> | ||||
<td>No-Operation.</td> | ||||
</tr> | ||||
<tr> | ||||
<td>2</td> | ||||
<td>4</td> | ||||
<td>Maximum Segment Size.</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t> | ||||
These options are specified in detail in <xref target="Option-Definitions" f | ||||
ormat="default"/>.</t> | ||||
<t> | ||||
A TCP implementation <bcp14>MUST</bcp14> be able to receive a TCP Option in | ||||
any segment (MUST-5).</t> | ||||
<t> | ||||
A TCP implementation <bcp14>MUST</bcp14> (MUST-6) ignore without error any T | ||||
CP Option it does not | ||||
implement, assuming that the option has a length field. All | implement, assuming that the option has a length field. All | |||
TCP options except End of option list and No-Operation MUST have length fiel | TCP Options except End of Option List Option (EOL) and No-Operation (NOP) <b | |||
ds, including all future options (MUST-68). | cp14>MUST</bcp14> have length fields, including all future options (MUST-68). | |||
TCP implementations MUST be prepared to handle an illegal option length | TCP implementations <bcp14>MUST</bcp14> be prepared to handle an illegal opt | |||
ion length | ||||
(e.g., zero); a suggested procedure is to | (e.g., zero); a suggested procedure is to | |||
reset the connection and log the error cause (MUST-7). | reset the connection and log the error cause (MUST-7). | |||
</t> | </t> | |||
<t>Note: There is ongoing work to extend the space available for TCP O | ||||
<t>Note: There is ongoing work to extend the space available for TCP options, | ptions, such as <xref target="I-D.ietf-tcpm-tcp-edo" format="default"/>.</t> | |||
such as <xref target="I-D.ietf-tcpm-tcp-edo"/>.</t> | </dd> | |||
<dt>Data:</dt> | ||||
<t hangText="Data: variable length."> | <dd> | |||
<vspace /> | <t> | |||
<vspace /> | variable length | |||
</t> | ||||
<t> | ||||
User data carried by the TCP segment. | User data carried by the TCP segment. | |||
</t> | </t> | |||
</list> | </dd> | |||
</t> | </dl> | |||
</section> | </section> | |||
<section title="Specific Option Definitions" anchor="Option-Definitions"> | <section anchor="Option-Definitions" numbered="true" toc="default"> | |||
<t> | <name>Specific Option Definitions</name> | |||
A TCP Option, in the mandatory option set, is one of: an End of Option L | <t> | |||
ist Option, a No-Operation Option, or a Maximum Segment Size Option. | A TCP Option, in the mandatory option set, is one of an End of Option Li | |||
</t> | st Option, a No-Operation Option, or a Maximum Segment Size Option. | |||
<t>An End of Option List Option is formatted as follows:</t> | </t> | |||
<figure><artwork> | <t>An End of Option List Option is formatted as follows:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
0 | 0 | |||
0 1 2 3 4 5 6 7 | 0 1 2 3 4 5 6 7 | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
| 0 | | | 0 | | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
</artwork></figure> | ]]></artwork> | |||
<t> | <t> | |||
where: | where: | |||
<list style="hanging" hangIndent="2"> | </t> | |||
<t hangText="Kind: 1 byte; Kind == 0."> | <dl newline="false" spacing="normal" indent="2"> | |||
<vspace /> | <dt>Kind:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 byte; Kind == 0. | ||||
</t> | ||||
<t> | ||||
This option code indicates the end of the option list. This | This option code indicates the end of the option list. This | |||
might not coincide with the end of the TCP header according to | might not coincide with the end of the TCP header according to | |||
the Data Offset field. This is used at the end of all options, | the Data Offset field. This is used at the end of all options, | |||
not the end of each option, and need only be used if the end of | not the end of each option, and need only be used if the end of | |||
the options would not otherwise coincide with the end of the TCP | the options would not otherwise coincide with the end of the TCP | |||
header. | header. | |||
</t> | </t> | |||
</list> | </dd> | |||
</t> | </dl> | |||
<t>A No-Operation Option is formatted as follows:</t> | ||||
<t>A No-Operation Option is formatted as follows:</t> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
<figure><artwork> | ||||
0 | 0 | |||
0 1 2 3 4 5 6 7 | 0 1 2 3 4 5 6 7 | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
| 1 | | | 1 | | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
</artwork></figure> | ]]></artwork> | |||
<t>where: | <t>where: | |||
<list style="hanging" hangIndent="2"> | </t> | |||
<t hangText="Kind: 1 byte; Kind == 1."> | <dl newline="false" spacing="normal" indent="2"> | |||
<vspace /> | <dt>Kind:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 byte; Kind == 1. | ||||
</t> | ||||
<t> | ||||
This option code can be used between options, for example, to | This option code can be used between options, for example, to | |||
align the beginning of a subsequent option on a word boundary. | align the beginning of a subsequent option on a word boundary. | |||
There is no guarantee that senders will use this option, so | There is no guarantee that senders will use this option, so | |||
receivers MUST be prepared to process options even if they do | receivers <bcp14>MUST</bcp14> be prepared to process options even if the y do | |||
not begin on a word boundary (MUST-64). | not begin on a word boundary (MUST-64). | |||
</t> | </t> | |||
</list> | </dd> | |||
</t> | </dl> | |||
<t>A Maximum Segment Size Option is formatted as follows:</t> | ||||
<t>A Maximum Segment Size Option is formatted as follows:</t> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
<figure><artwork> | ||||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| 2 | Length | Maximum Segment Size (MSS) | | | 2 | Length | Maximum Segment Size (MSS) | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
</artwork></figure> | ]]></artwork> | |||
<t>where: | <t>where: | |||
<list style="hanging" hangIndent="2"> | </t> | |||
<t hangText="Kind: 1 byte; Kind == 2."> | <dl newline="false" spacing="normal" indent="2"> | |||
<vspace /> | <dt>Kind:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 byte; Kind == 2. | ||||
</t> | ||||
<t> | ||||
If this option is present, then it communicates the maximum | If this option is present, then it communicates the maximum | |||
receive segment size at the TCP endpoint that sends this segment. | receive segment size at the TCP endpoint that sends this segment. | |||
This value is limited by the IP reassembly limit. This field may be sen t in the initial connection request | This value is limited by the IP reassembly limit. This field may be sen t in the initial connection request | |||
(i.e., in segments with the SYN control bit set) and MUST NOT | (i.e., in segments with the SYN control bit set) and <bcp14>MUST NOT</bc p14> | |||
be sent in other segments (MUST-65). If this | be sent in other segments (MUST-65). If this | |||
option is not used, any segment size is allowed. | option is not used, any segment size is allowed. | |||
A more complete description of this option is provided in <xref target=" | A more complete description of this option is provided in <xref target=" | |||
mss"/>. | mss" format="default"/>. | |||
</t> | </t> | |||
<t hangText="Length: 1 byte; Length == 4."> | </dd> | |||
<vspace /> | <dt>Length:</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
1 byte; Length == 4. | ||||
</t> | ||||
<t> | ||||
Length of the option in bytes. | Length of the option in bytes. | |||
</t> | </t> | |||
<t hangText="Maximum Segment Size (MSS): 2 bytes."> | </dd> | |||
<vspace /> | <dt>Maximum Segment Size (MSS):</dt> | |||
<vspace /> | <dd> | |||
<t> | ||||
2 bytes. | ||||
</t> | ||||
<t> | ||||
The maximum receive segment size at the TCP endpoint that sends this seg ment. | The maximum receive segment size at the TCP endpoint that sends this seg ment. | |||
</t> | </t> | |||
</list> | </dd> | |||
</t> | </dl> | |||
<section numbered="true" toc="default"> | ||||
<section title="Other Common Options"> | <name>Other Common Options</name> | |||
<t> | <t> | |||
Additional RFCs define some other commonly used options that are recommended to | Additional RFCs define some other commonly used options that are recommended to | |||
implement for high performance, but not necessary for basic TCP interoperability | implement for high performance but are not necessary for basic TCP interoperabil | |||
. These are the TCP Selective Acknowledgement (SACK) option <xref target="RFC20 | ity. These are the TCP Selective Acknowledgment (SACK) Option <xref target="RFC | |||
18"/><xref target="RFC2883"/>, TCP Timestamp (TS) option <xref target="RFC7323"/ | 2018" format="default"/> <xref target="RFC2883" format="default"/>, TCP Timestam | |||
>, and TCP Window Scaling (WS) option <xref target="RFC7323"/>. | p (TS) Option <xref target="RFC7323" format="default"/>, and TCP Window Scale (W | |||
</t> | S) Option <xref target="RFC7323" format="default"/>. | |||
</section> | </t> | |||
</section> | ||||
<section title="Experimental TCP Options"> | <section numbered="true" toc="default"> | |||
<t> | <name>Experimental TCP Options</name> | |||
Experimental TCP option values are defined in <xref target="RFC4727"/>, and <xre | <t> | |||
f target="RFC6994"/> describes the current recommended usage for these experimen | Experimental TCP Option values are defined in <xref target="RFC4727" format="def | |||
tal values. | ault"/>, and <xref target="RFC6994" format="default"/> describes the current rec | |||
</t> | ommended usage for these experimental values. | |||
</section> | </t> | |||
</section> | </section> | |||
<section title="TCP Terminology Overview"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
This section includes an overview of key terms needed to understand the detailed | <name>TCP Terminology Overview</name> | |||
protocol operation in the rest of the document. There is a glossary of terms i | <t> | |||
n <xref target="glossary"/>. | This section includes an overview of key terms needed to understand the detailed | |||
protocol operation in the rest of the document. There is a glossary of terms i | ||||
n <xref target="glossary" format="default"/>. | ||||
</t> | </t> | |||
<section title="Key Connection State Variables"> | <section numbered="true" toc="default"> | |||
<t> | <name>Key Connection State Variables</name> | |||
Before we can discuss very much about the operation of the TCP implementation | <t> | |||
we need | Before we can discuss the operation of the TCP implementation in detail, we ne | |||
ed | ||||
to introduce some detailed terminology. The maintenance of a TCP | to introduce some detailed terminology. The maintenance of a TCP | |||
connection requires maintaining state for several variables. We conceive | connection requires maintaining state for several variables. We conceive | |||
of these variables being stored in a connection record called a | of these variables being stored in a connection record called a | |||
Transmission Control Block or TCB. Among the variables stored in the | Transmission Control Block or TCB. Among the variables stored in the | |||
TCB are the local and remote IP addresses and port numbers, the IP security le | TCB are the local and remote IP addresses and port numbers, the IP security le | |||
vel and compartment | vel, and compartment | |||
of the connection (see <xref target="seccomp"/>), pointers to the user's send | of the connection (see <xref target="seccomp" format="default"/>), pointers to | |||
and receive | the user's send and receive | |||
buffers, pointers to the retransmit queue and to the current segment. | buffers, pointers to the retransmit queue and to the current segment. | |||
In addition, several variables relating to the send and receive | In addition, several variables relating to the send and receive | |||
sequence numbers are stored in the TCB. | sequence numbers are stored in the TCB. | |||
</t> | </t> | |||
<t><figure><artwork> | <table> | |||
Send Sequence Variables: | <name>Send Sequence Variables</name> | |||
<thead> | ||||
SND.UNA - send unacknowledged | <tr> | |||
SND.NXT - send next | <th>Variable</th> | |||
SND.WND - send window | <th>Description</th> | |||
SND.UP - send urgent pointer | </tr> | |||
SND.WL1 - segment sequence number used for last window update | </thead> | |||
SND.WL2 - segment acknowledgment number used for last window | <tbody> | |||
update | <tr> | |||
ISS - initial send sequence number | <td>SND.UNA</td> | |||
<td>send unacknowledged</td> | ||||
Receive Sequence Variables: | </tr> | |||
<tr> | ||||
RCV.NXT - receive next | <td>SND.NXT</td> | |||
RCV.WND - receive window | <td>send next</td> | |||
RCV.UP - receive urgent pointer | </tr> | |||
IRS - initial receive sequence number | <tr> | |||
</artwork></figure></t> | <td>SND.WND</td> | |||
<t> | <td>send window</td> | |||
</tr> | ||||
<tr> | ||||
<td>SND.UP</td> | ||||
<td>send urgent pointer</td> | ||||
</tr> | ||||
<tr> | ||||
<td>SND.WL1</td> | ||||
<td>segment sequence number used for last window update</td> | ||||
</tr> | ||||
<tr> | ||||
<td>SND.WL2</td> | ||||
<td>segment acknowledgment number used for last window update</t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td>ISS</td> | ||||
<td>initial send sequence number</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<table> | ||||
<name>Receive Sequence Variables</name> | ||||
<thead> | ||||
<tr> | ||||
<th>Variable</th> | ||||
<th>Description</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td>RCV.NXT</td> | ||||
<td>receive next</td> | ||||
</tr> | ||||
<tr> | ||||
<td>RCV.WND</td> | ||||
<td>receive window</td> | ||||
</tr> | ||||
<tr> | ||||
<td>RCV.UP</td> | ||||
<td>receive urgent pointer</td> | ||||
</tr> | ||||
<tr> | ||||
<td>IRS</td> | ||||
<td>initial receive sequence number</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t> | ||||
The following diagrams may help to relate some of these variables to | The following diagrams may help to relate some of these variables to | |||
the sequence space. | the sequence space. | |||
</t> | </t> | |||
<figure anchor="send_seq_space" title="Send Sequence Space"> | <figure anchor="send_seq_space"> | |||
<artwork> | <name>Send Sequence Space</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
1 2 3 4 | 1 2 3 4 | |||
----------|----------|----------|---------- | ----------|----------|----------|---------- | |||
SND.UNA SND.NXT SND.UNA | SND.UNA SND.NXT SND.UNA | |||
+SND.WND | +SND.WND | |||
1 - old sequence numbers that have been acknowledged | 1 - old sequence numbers that have been acknowledged | |||
2 - sequence numbers of unacknowledged data | 2 - sequence numbers of unacknowledged data | |||
3 - sequence numbers allowed for new data transmission | 3 - sequence numbers allowed for new data transmission | |||
4 - future sequence numbers that are not yet allowed | 4 - future sequence numbers that are not yet allowed | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
The send window is the portion of the sequence space labeled 3 in | The send window is the portion of the sequence space labeled 3 in | |||
<xref target="send_seq_space" />. | <xref target="send_seq_space" format="default"/>. | |||
</t> | </t> | |||
<figure anchor="recv_seq_space" title="Receive Sequence Space"> | <figure anchor="recv_seq_space"> | |||
<artwork> | <name>Receive Sequence Space</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
1 2 3 | 1 2 3 | |||
----------|----------|---------- | ----------|----------|---------- | |||
RCV.NXT RCV.NXT | RCV.NXT RCV.NXT | |||
+RCV.WND | +RCV.WND | |||
1 - old sequence numbers that have been acknowledged | 1 - old sequence numbers that have been acknowledged | |||
2 - sequence numbers allowed for new reception | 2 - sequence numbers allowed for new reception | |||
3 - future sequence numbers that are not yet allowed | 3 - future sequence numbers that are not yet allowed | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
The receive window is the portion of the sequence space labeled 2 in | The receive window is the portion of the sequence space labeled 2 in | |||
<xref target="recv_seq_space" />. | <xref target="recv_seq_space" format="default"/>. | |||
</t> | </t> | |||
<t> | <t> | |||
There are also some variables used frequently in the discussion that | There are also some variables used frequently in the discussion that | |||
take their values from the fields of the current segment. | take their values from the fields of the current segment. | |||
</t> | </t> | |||
<t>Current Segment Variables: | <table> | |||
<figure><artwork> | <name>Current Segment Variables</name> | |||
SEG.SEQ - segment sequence number | <thead> | |||
SEG.ACK - segment acknowledgment number | <tr> | |||
SEG.LEN - segment length | <th>Variable</th> | |||
SEG.WND - segment window | <th>Description</th> | |||
SEG.UP - segment urgent pointer | </tr> | |||
</artwork></figure> | </thead> | |||
</t> | <tbody> | |||
</section> | <tr> | |||
<td>SEG.SEQ</td> | ||||
<section title="State Machine Overview"> | <td>segment sequence number</td> | |||
<t> | </tr> | |||
<tr> | ||||
<td>SEG.ACK</td> | ||||
<td>segment acknowledgment number</td> | ||||
</tr> | ||||
<tr> | ||||
<td>SEG.LEN</td> | ||||
<td>segment length</td> | ||||
</tr> | ||||
<tr> | ||||
<td>SEG.WND</td> | ||||
<td>segment window</td> | ||||
</tr> | ||||
<tr> | ||||
<td>SEG.UP</td> | ||||
<td>segment urgent pointer</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | ||||
<section numbered="true" toc="default"> | ||||
<name>State Machine Overview</name> | ||||
<t> | ||||
A connection progresses through a series of states during its | A connection progresses through a series of states during its | |||
lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, | lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, | |||
ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, | ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, | |||
TIME-WAIT, and the fictional state CLOSED. CLOSED is fictional | TIME-WAIT, and the fictional state CLOSED. CLOSED is fictional | |||
because it represents the state when there is no TCB, and therefore, | because it represents the state when there is no TCB, and therefore, | |||
no connection. Briefly the meanings of the states are: | no connection. Briefly the meanings of the states are: | |||
</t> | </t> | |||
<t><list> | <dl> | |||
<t>LISTEN - represents waiting for a connection request from any remote | <dt>LISTEN -</dt><dd>represents waiting for a connection request fro | |||
TCP peer and port.</t> | m any remote | |||
TCP peer and port.</dd> | ||||
<t>SYN-SENT - represents waiting for a matching connection request | <dt>SYN-SENT -</dt><dd>represents waiting for a matching connection | |||
after having sent a connection request.</t> | request | |||
after having sent a connection request.</dd> | ||||
<t>SYN-RECEIVED - represents waiting for a confirming connection | <dt>SYN-RECEIVED -</dt><dd>represents waiting for a confirming conne | |||
ction | ||||
request acknowledgment after having both received and sent a | request acknowledgment after having both received and sent a | |||
connection request.</t> | connection request.</dd> | |||
<dt>ESTABLISHED -</dt><dd>represents an open connection, data receiv | ||||
<t>ESTABLISHED - represents an open connection, data received can be | ed can be | |||
delivered to the user. The normal state for the data transfer phase | delivered to the user. The normal state for the data transfer phase | |||
of the connection.</t> | of the connection.</dd> | |||
<dt>FIN-WAIT-1 -</dt><dd>represents waiting for a connection termina | ||||
<t>FIN-WAIT-1 - represents waiting for a connection termination request | tion request | |||
from the remote TCP peer, or an acknowledgment of the connection | from the remote TCP peer, or an acknowledgment of the connection | |||
termination request previously sent.</t> | termination request previously sent.</dd> | |||
<dt>FIN-WAIT-2 -</dt><dd>represents waiting for a connection termina | ||||
<t>FIN-WAIT-2 - represents waiting for a connection termination request | tion request | |||
from the remote TCP peer.</t> | from the remote TCP peer.</dd> | |||
<dt>CLOSE-WAIT -</dt><dd>represents waiting for a connection termina | ||||
<t>CLOSE-WAIT - represents waiting for a connection termination request | tion request | |||
from the local user.</t> | from the local user.</dd> | |||
<dt>CLOSING -</dt><dd>represents waiting for a connection terminatio | ||||
<t>CLOSING - represents waiting for a connection termination request | n request | |||
acknowledgment from the remote TCP peer.</t> | acknowledgment from the remote TCP peer.</dd> | |||
<dt>LAST-ACK -</dt><dd>represents waiting for an acknowledgment of t | ||||
<t>LAST-ACK - represents waiting for an acknowledgment of the | he | |||
connection termination request previously sent to the remote TCP peer | connection termination request previously sent to the remote TCP peer | |||
(this termination request sent to the remote TCP peer already included an ac | (this termination request sent to the remote TCP peer already included an ac | |||
knowledgment of the termination request sent from the remote TCP peer).</t> | knowledgment of the termination request sent from the remote TCP peer).</dd> | |||
<dt>TIME-WAIT -</dt><dd>represents waiting for enough time to pass t | ||||
<t>TIME-WAIT - represents waiting for enough time to pass to be sure | o be sure | |||
the remote TCP peer received the acknowledgment of its connection | the remote TCP peer received the acknowledgment of its connection | |||
termination request, and to avoid new connections being impacted by delayed | termination request and to avoid new connections being impacted by delayed | |||
segments from previous connections.</t> | segments from previous connections.</dd> | |||
<dt>CLOSED -</dt><dd>represents no connection state at all.</dd> | ||||
<t>CLOSED - represents no connection state at all.</t> | </dl> | |||
</list></t> | <t> | |||
<t> | ||||
A TCP connection progresses from one state to another in response to | A TCP connection progresses from one state to another in response to | |||
events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, | events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, | |||
ABORT, and STATUS; the incoming segments, particularly those | ABORT, and STATUS; the incoming segments, particularly those | |||
containing the SYN, ACK, RST and FIN flags; and timeouts. | containing the SYN, ACK, RST, and FIN flags; and timeouts. | |||
</t> | </t> | |||
<t> | <t> | |||
The OPEN call specifies | The OPEN call specifies | |||
whether connection establishment is to be actively pursued, or to | whether connection establishment is to be actively pursued, or to | |||
be passively waited for. | be passively waited for. | |||
</t> | </t> | |||
<t> | <t> | |||
A passive OPEN request means that the process wants to accept incoming | A passive OPEN request means that the process wants to accept incoming | |||
connection requests, in contrast to an active OPEN attempting to initiate a co nnection. | connection requests, in contrast to an active OPEN attempting to initiate a co nnection. | |||
</t> | </t> | |||
<t> | <t> | |||
The state diagram in <xref target="conn_states" /> illustrates only state chan | The state diagram in <xref target="conn_states" format="default"/> illustrates | |||
ges, together | only state changes, together | |||
with the causing events and resulting actions, but addresses neither | with the causing events and resulting actions, but addresses neither | |||
error conditions nor actions that are not connected with state | error conditions nor actions that are not connected with state | |||
changes. In a later section, more detail is offered with respect to | changes. In a later section, more detail is offered with respect to | |||
the reaction of the TCP implementation to events. Some state names are abbrev iated or hyphenated differently in the diagram from how they appear elsewhere in the document. | the reaction of the TCP implementation to events. Some state names are abbrev iated or hyphenated differently in the diagram from how they appear elsewhere in the document. | |||
</t> | </t> | |||
<t> | <dl> | |||
NOTA BENE: This diagram is only a summary and must not be taken as | <dt> | |||
NOTA BENE:</dt><dd>This diagram is only a summary and must not be taken as | ||||
the total specification. Many details are not included. | the total specification. Many details are not included. | |||
</t> | </dd> | |||
<figure anchor="conn_states" title="TCP Connection State Diagram"> | </dl> | |||
<artwork> | <figure anchor="conn_states"> | |||
<name>TCP Connection State Diagram</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
+---------+ ---------\ active OPEN | +---------+ ---------\ active OPEN | |||
| CLOSED | \ ----------- | | CLOSED | \ ----------- | |||
+---------+<---------\ \ create TCB | +---------+<---------\ \ create TCB | |||
| ^ \ \ snd SYN | | ^ \ \ snd SYN | |||
passive OPEN | | CLOSE \ \ | passive OPEN | | CLOSE \ \ | |||
------------ | | ---------- \ \ | ------------ | | ---------- \ \ | |||
create TCB | | delete TCB \ \ | create TCB | | delete TCB \ \ | |||
V | \ \ | V | \ \ | |||
rcv RST (note 1) +---------+ CLOSE | \ | rcv RST (note 1) +---------+ CLOSE | \ | |||
-------------------->| LISTEN | ---------- | | | -------------------->| LISTEN | ---------- | | | |||
/ +---------+ delete TCB | | | / +---------+ delete TCB | | | |||
/ rcv SYN | | SEND | | | / rcv SYN | | SEND | | | |||
/ ----------- | | ------- | V | / ----------- | | ------- | V | |||
+--------+ snd SYN,ACK / \ snd SYN +--------+ | +--------+ snd SYN,ACK / \ snd SYN +--------+ | |||
| |<----------------- ------------------>| | | | |<----------------- ------------------>| | | |||
| SYN | rcv SYN | SYN | | | SYN | rcv SYN | SYN | | |||
| RCVD |<-----------------------------------------------| SENT | | | RCVD |<-----------------------------------------------| SENT | | |||
| | snd SYN,ACK | | | | | snd SYN,ACK | | | |||
| |------------------ -------------------| | | | |------------------ -------------------| | | |||
+--------+ rcv ACK of SYN \ / rcv SYN,ACK +--------+ | +--------+ rcv ACK of SYN \ / rcv SYN,ACK +--------+ | |||
| -------------- | | ----------- | | -------------- | | ----------- | |||
| x | | snd ACK | | x | | snd ACK | |||
| V V | | V V | |||
| CLOSE +---------+ | | CLOSE +---------+ | |||
| ------- | ESTAB | | | ------- | ESTAB | | |||
| snd FIN +---------+ | | snd FIN +---------+ | |||
| CLOSE | | rcv FIN | | CLOSE | | rcv FIN | |||
V ------- | | ------- | V ------- | | ------- | |||
+---------+ snd FIN / \ snd ACK +---------+ | +---------+ snd FIN / \ snd ACK +---------+ | |||
| FIN |<---------------- ------------------>| CLOSE | | | FIN |<---------------- ------------------>| CLOSE | | |||
| WAIT-1 |------------------ | WAIT | | | WAIT-1 |------------------ | WAIT | | |||
+---------+ rcv FIN \ +---------+ | +---------+ rcv FIN \ +---------+ | |||
| rcv ACK of FIN ------- | CLOSE | | | rcv ACK of FIN ------- | CLOSE | | |||
| -------------- snd ACK | ------- | | | -------------- snd ACK | ------- | | |||
V x V snd FIN V | V x V snd FIN V | |||
+---------+ +---------+ +---------+ | +---------+ +---------+ +---------+ | |||
|FINWAIT-2| | CLOSING | | LAST-ACK| | |FINWAIT-2| | CLOSING | | LAST-ACK| | |||
+---------+ +---------+ +---------+ | +---------+ +---------+ +---------+ | |||
| rcv ACK of FIN | rcv ACK of FIN | | | rcv ACK of FIN | rcv ACK of FIN | | |||
| rcv FIN -------------- | Timeout=2MSL -------------- | | | rcv FIN -------------- | Timeout=2MSL -------------- | | |||
| ------- x V ------------ x V | | ------- x V ------------ x V | |||
\ snd ACK +---------+delete TCB +---------+ | \ snd ACK +---------+delete TCB +---------+ | |||
-------------------->|TIME-WAIT|------------------->| CLOSED | | -------------------->|TIME-WAIT|------------------->| CLOSED | | |||
+---------+ +---------+ | +---------+ +---------+ | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t>The following notes apply to <xref target="conn_states"/>: | <t>The following notes apply to <xref target="conn_states" format="def | |||
<list> | ault"/>: | |||
<t> | ||||
Note 1: The transition from SYN-RECEIVED to LISTEN on receiving a RST is | ||||
conditional on having reached SYN-RECEIVED after a passive open. | ||||
</t> | </t> | |||
<t> | <dl> | |||
Note 2: The figure omits a transition from FIN-WAIT-1 to TIME-WAIT if | <dt> | |||
Note 1:</dt><dd>The transition from SYN-RECEIVED to LISTEN on receiving a RST is | ||||
conditional on having reached SYN-RECEIVED after a passive OPEN. | ||||
</dd> | ||||
<dt> | ||||
Note 2:</dt><dd>The figure omits a transition from FIN-WAIT-1 to TIME-WAIT if | ||||
a FIN is received and the local FIN is also acknowledged. | a FIN is received and the local FIN is also acknowledged. | |||
</t> | </dd> | |||
<t> | <dt> | |||
Note 3: A RST can be sent from any state with a corresponding transition to TIME | Note 3:</dt><dd>A RST can be sent from any state with a corresponding transition | |||
-WAIT (see <xref target="FTY99"/> for rationale). These transitions are not exp | to TIME-WAIT (see <xref target="FTY99" format="default"/> for rationale). Thes | |||
licitly shown, otherwise the diagram would become very difficult to read. Simil | e transitions are not explicitly shown; otherwise, the diagram would become very | |||
arly, receipt of a RST from any state results in a transition to LISTEN or CLOSE | difficult to read. Similarly, receipt of a RST from any state results in a tra | |||
D, though this is also omitted from the diagram for legibility. | nsition to LISTEN or CLOSED, though this is also omitted from the diagram for le | |||
</t> | gibility. | |||
</list></t> | </dd> | |||
</section> | </dl> | |||
</section> | </section> | |||
<section title="Sequence Numbers"> | </section> | |||
<t> | <section anchor="sequence-numbers" numbered="true" toc="default"> | |||
<name>Sequence Numbers</name> | ||||
<t> | ||||
A fundamental notion in the design is that every octet of data sent | A fundamental notion in the design is that every octet of data sent | |||
over a TCP connection has a sequence number. Since every octet is | over a TCP connection has a sequence number. Since every octet is | |||
sequenced, each of them can be acknowledged. The acknowledgment | sequenced, each of them can be acknowledged. The acknowledgment | |||
mechanism employed is cumulative so that an acknowledgment of sequence | mechanism employed is cumulative so that an acknowledgment of sequence | |||
number X indicates that all octets up to but not including X have been | number X indicates that all octets up to but not including X have been | |||
received. This mechanism allows for straight-forward duplicate | received. This mechanism allows for straightforward duplicate | |||
detection in the presence of retransmission. Numbering of octets | detection in the presence of retransmission. The numbering scheme of octets | |||
within a segment is that the first data octet immediately following | within a segment is as follows: the first data octet immediately following | |||
the header is the lowest numbered, and the following octets are | the header is the lowest numbered, and the following octets are | |||
numbered consecutively. | numbered consecutively. | |||
</t> | </t> | |||
<t> | <t> | |||
It is essential to remember that the actual sequence number space is | It is essential to remember that the actual sequence number space is | |||
finite, though large. This space ranges from 0 to 2**32 - 1. | finite, though large. This space ranges from 0 to 2<sup>32</sup> - 1. | |||
Since the space is finite, all arithmetic dealing with sequence | Since the space is finite, all arithmetic dealing with sequence | |||
numbers must be performed modulo 2**32. This unsigned arithmetic | numbers must be performed modulo 2<sup>32</sup>. This unsigned arithmetic | |||
preserves the relationship of sequence numbers as they cycle from | preserves the relationship of sequence numbers as they cycle from | |||
2**32 - 1 to 0 again. There are some subtleties to computer modulo | 2<sup>32</sup> - 1 to 0 again. There are some subtleties to computer modulo | |||
arithmetic, so great care should be taken in programming the | arithmetic, so great care should be taken in programming the | |||
comparison of such values. The symbol "=<" means "less than or equal" | comparison of such values. The symbol "=<" means "less than or equal" | |||
(modulo 2**32). | (modulo 2<sup>32</sup>). | |||
</t> | </t> | |||
<t> | <t> | |||
The typical kinds of sequence number comparisons that the TCP implementation m ust | The typical kinds of sequence number comparisons that the TCP implementation m ust | |||
perform include: | perform include: | |||
</t> | </t> | |||
<t><list> | <ol type="(%c)" spacing="normal"> | |||
<t>(a) Determining that an acknowledgment refers to some sequence | <li>Determining that an acknowledgment refers to some sequence | |||
number sent but not yet acknowledged.</t> | number sent but not yet acknowledged.</li> | |||
<li>Determining that all sequence numbers occupied by a segment | ||||
<t>(b) Determining that all sequence numbers occupied by a segment | ||||
have been acknowledged (e.g., to remove the segment from a | have been acknowledged (e.g., to remove the segment from a | |||
retransmission queue).</t> | retransmission queue).</li> | |||
<li>Determining that an incoming segment contains sequence numbers | ||||
<t>(c) Determining that an incoming segment contains sequence numbers | ||||
that are expected (i.e., that the segment "overlaps" the | that are expected (i.e., that the segment "overlaps" the | |||
receive window).</t> | receive window).</li> | |||
</list></t> | </ol> | |||
<t> | <t> | |||
In response to sending data the TCP endpoint will receive acknowledgments. Th | In response to sending data, the TCP endpoint will receive acknowledgments. T | |||
e | he | |||
following comparisons are needed to process the acknowledgments. | following comparisons are needed to process the acknowledgments: | |||
</t> | </t> | |||
<t><list> | <t indent="3"> | |||
<t>SND.UNA = oldest unacknowledged sequence number</t> | SND.UNA = oldest unacknowledged sequence number | |||
</t> | ||||
<t>SND.NXT = next sequence number to be sent</t> | <t indent="3"> | |||
SND.NXT = next sequence number to be sent | ||||
<t>SEG.ACK = acknowledgment from the receiving TCP peer (next sequence | </t> | |||
number expected by the receiving TCP peer)</t> | <t indent="3"> | |||
SEG.ACK = acknowledgment from the receiving TCP peer (next sequence | ||||
<t>SEG.SEQ = first sequence number of a segment</t> | number expected by the receiving TCP peer) | |||
</t> | ||||
<t>SEG.LEN = the number of octets occupied by the data in the segment | <t indent="3"> | |||
(counting SYN and FIN)</t> | SEG.SEQ = first sequence number of a segment | |||
</t> | ||||
<t>SEG.SEQ+SEG.LEN-1 = last sequence number of a segment</t> | <t indent="3"> | |||
</list></t> | SEG.LEN = the number of octets occupied by the data in the segment | |||
<t> | (counting SYN and FIN) | |||
A new acknowledgment (called an "acceptable ack"), is one for which | </t> | |||
<t indent="3"> | ||||
SEG.SEQ+SEG.LEN-1 = last sequence number of a segment | ||||
</t> | ||||
<t> | ||||
A new acknowledgment (called an "acceptable ack") is one for which | ||||
the inequality below holds: | the inequality below holds: | |||
</t> | </t> | |||
<t><list> | <t indent="3"> | |||
<t>SND.UNA < SEG.ACK =< SND.NXT</t> | SND.UNA < SEG.ACK =< SND.NXT | |||
</list></t> | </t> | |||
<t> | <t> | |||
A segment on the retransmission queue is fully acknowledged if the sum | A segment on the retransmission queue is fully acknowledged if the sum | |||
of its sequence number and length is less or equal than the | of its sequence number and length is less than or equal to the | |||
acknowledgment value in the incoming segment. | acknowledgment value in the incoming segment. | |||
</t> | </t> | |||
<t> | <t> | |||
When data is received the following comparisons are needed: | When data is received, the following comparisons are needed: | |||
</t> | </t> | |||
<t><list> | <t indent="3"> | |||
<t>RCV.NXT = next sequence number expected on an incoming segment, and | RCV.NXT = next sequence number expected on an incoming segment, and | |||
is the left or lower edge of the receive window</t> | is the left or lower edge of the receive window | |||
</t> | ||||
<t>RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming | <t indent="3"> | |||
segment, and is the right or upper edge of the receive window</t> | RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming | |||
segment, and is the right or upper edge of the receive window | ||||
<t>SEG.SEQ = first sequence number occupied by the incoming segment</t> | </t> | |||
<t indent="3"> | ||||
<t>SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming | SEG.SEQ = first sequence number occupied by the incoming segment | |||
segment</t> | </t> | |||
</list></t> | <t indent="3"> | |||
<t> | SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming | |||
segment | ||||
</t> | ||||
<t> | ||||
A segment is judged to occupy a portion of valid receive sequence | A segment is judged to occupy a portion of valid receive sequence | |||
space if | space if | |||
</t> | </t> | |||
<t><list> | <t indent="3"> | |||
<t>RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND</t> | RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND | |||
</list></t> | </t> | |||
<t> | ||||
<t> | ||||
or | or | |||
</t> | </t> | |||
<t><list> | <t indent="3"> | |||
<t>RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND</t> | RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND | |||
</list></t> | </t> | |||
<t> | <t> | |||
The first part of this test checks to see if the beginning of the | The first part of this test checks to see if the beginning of the | |||
segment falls in the window, the second part of the test checks to see | segment falls in the window, the second part of the test checks to see | |||
if the end of the segment falls in the window; if the segment passes | if the end of the segment falls in the window; if the segment passes | |||
either part of the test it contains data in the window. | either part of the test, it contains data in the window. | |||
</t> | </t> | |||
<t> | <t> | |||
Actually, it is a little more complicated than this. Due to zero | Actually, it is a little more complicated than this. Due to zero | |||
windows and zero length segments, we have four cases for the | windows and zero-length segments, we have four cases for the | |||
acceptability of an incoming segment: | acceptability of an incoming segment: | |||
</t> | </t> | |||
<t><figure><artwork> | <table> | |||
Segment Receive Test | <name>Segment Acceptability Tests</name> | |||
Length Window | <thead> | |||
------- ------- ------------------------------------------- | <tr> | |||
<th>Segment Length</th> | ||||
0 0 SEG.SEQ = RCV.NXT | <th>Receive Window</th> | |||
<th>Test</th> | ||||
0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND | </tr> | |||
</thead> | ||||
>0 0 not acceptable | <tbody> | |||
<tr> | ||||
>0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND | <td>0</td> | |||
or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND | <td>0</td> | |||
</artwork></figure></t> | <td>SEG.SEQ = RCV.NXT</td> | |||
<t> | </tr> | |||
<tr> | ||||
<td>0</td> | ||||
<td>>0</td> | ||||
<td>RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND</td> | ||||
</tr> | ||||
<tr> | ||||
<td>>0</td> | ||||
<td>0</td> | ||||
<td>not acceptable</td> | ||||
</tr> | ||||
<tr> | ||||
<td>>0</td> | ||||
<td>>0</td> | ||||
<td> | ||||
<t>RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND</t> | ||||
<t>or</t> | ||||
<t>RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND</t> | ||||
</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t> | ||||
Note that when the receive window is zero no segments should be | Note that when the receive window is zero no segments should be | |||
acceptable except ACK segments. Thus, it is possible for a TCP implementation to | acceptable except ACK segments. Thus, it is possible for a TCP implementation to | |||
maintain a zero receive window while transmitting data and receiving | maintain a zero receive window while transmitting data and receiving | |||
ACKs. A TCP receiver MUST | ACKs. A TCP receiver <bcp14>MUST</bcp14> | |||
process the RST and URG fields of all incoming segments, even when the receive window is zero (MUST-66). | process the RST and URG fields of all incoming segments, even when the receive window is zero (MUST-66). | |||
</t> | </t> | |||
<t> | <t> | |||
We have taken advantage of the numbering scheme to protect certain | We have taken advantage of the numbering scheme to protect certain | |||
control information as well. This is achieved by implicitly including | control information as well. This is achieved by implicitly including | |||
some control flags in the sequence space so they can be retransmitted | some control flags in the sequence space so they can be retransmitted | |||
and acknowledged without confusion (i.e., one and only one copy of the | and acknowledged without confusion (i.e., one and only one copy of the | |||
control will be acted upon). Control information is not physically | control will be acted upon). Control information is not physically | |||
carried in the segment data space. Consequently, we must adopt rules | carried in the segment data space. Consequently, we must adopt rules | |||
for implicitly assigning sequence numbers to control. The SYN and FIN | for implicitly assigning sequence numbers to control. The SYN and FIN | |||
are the only controls requiring this protection, and these controls | are the only controls requiring this protection, and these controls | |||
are used only at connection opening and closing. For sequence number | are used only at connection opening and closing. For sequence number | |||
purposes, the SYN is considered to occur before the first actual data | purposes, the SYN is considered to occur before the first actual data | |||
octet of the segment in which it occurs, while the FIN is considered | octet of the segment in which it occurs, while the FIN is considered | |||
to occur after the last actual data octet in a segment in which it | to occur after the last actual data octet in a segment in which it | |||
occurs. The segment length (SEG.LEN) includes both data and sequence | occurs. The segment length (SEG.LEN) includes both data and sequence | |||
space-occupying controls. When a SYN is present then SEG.SEQ is the | space-occupying controls. When a SYN is present, then SEG.SEQ is the | |||
sequence number of the SYN. | sequence number of the SYN. | |||
</t> | </t> | |||
<section title="Initial Sequence Number Selection"> | <section numbered="true" toc="default"> | |||
<t> | <name>Initial Sequence Number Selection</name> | |||
<t> | ||||
A connection is defined by a pair of | A connection is defined by a pair of | |||
sockets. Connections can be reused. New instances of a connection will be re ferred to as | sockets. Connections can be reused. New instances of a connection will be re ferred to as | |||
incarnations of the connection. The problem that arises from this is | incarnations of the connection. The problem that arises from this is | |||
-- "how does the TCP implementation identify duplicate segments from prev | -- "how does the TCP implementation identify duplicate segments from previous | |||
ious | incarnations of the connection?" This problem becomes apparent if the | |||
incarnations of the connection?" This problem becomes apparent if the | ||||
connection is being opened and closed in quick succession, or if the | connection is being opened and closed in quick succession, or if the | |||
connection breaks with loss of memory and is then reestablished. | connection breaks with loss of memory and is then reestablished. | |||
To support this, the TIME-WAIT state limits the rate of connection reuse, | To support this, the TIME-WAIT state limits the rate of connection reuse, | |||
while the initial sequence number selection described below further protects | while the initial sequence number selection described below further protects | |||
against ambiguity about what incarnation of a connection an incoming packet | against ambiguity about which incarnation of a connection an incoming packet | |||
corresponds to. | corresponds to. | |||
</t> | </t> | |||
<t> | <t> | |||
To avoid confusion we must prevent segments from one incarnation of a | To avoid confusion, we must prevent segments from one incarnation of a | |||
connection from being used while the same sequence numbers may still | connection from being used while the same sequence numbers may still | |||
be present in the network from an earlier incarnation. We want to | be present in the network from an earlier incarnation. We want to | |||
assure this, even if a TCP endpoint loses all knowledge of the | assure this even if a TCP endpoint loses all knowledge of the | |||
sequence numbers it has been using. When new connections are created, | sequence numbers it has been using. When new connections are created, | |||
an initial sequence number (ISN) generator is employed that selects a | an initial sequence number (ISN) generator is employed that selects a | |||
new 32 bit ISN. There are security issues that result if an off-path | new 32-bit ISN. There are security issues that result if an off-path | |||
attacker is able to predict or guess ISN values <xref target="RFC6528"/>. | attacker is able to predict or guess ISN values <xref target="RFC6528" format= | |||
"default"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
TCP Initial Sequence Numbers are generated from a number sequence that | TCP initial sequence numbers are generated from a number sequence that | |||
monotonically increases until it wraps, known loosely as a "clock". | monotonically increases until it wraps, known loosely as a "clock". | |||
This clock is a 32-bit counter that typically increments at least once every | This clock is a 32-bit counter that typically increments at least once every | |||
roughly 4 microseconds, although it is neither assumed to be realtime nor | roughly 4 microseconds, although it is neither assumed to be realtime nor | |||
precise, and need not persist across reboots. The clock component is intended | precise, and need not persist across reboots. The clock component is intended | |||
to ensure that with a Maximum Segment Lifetime (MSL), generated ISNs will be | to ensure that with a Maximum Segment Lifetime (MSL), generated ISNs will be | |||
unique, since it cycles approximately every 4.55 hours, which is much longer | unique since it cycles approximately every 4.55 hours, which is much longer | |||
than the MSL. | than the MSL. Please note that for modern networks that support high data | |||
rates where the connection might start and quickly advance sequence numbers to | ||||
overlap within the MSL, it is recommended to implement the Timestamp Option as | ||||
mentioned later in <xref target="tcp_quiet_time_concept"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation MUST use the above type of "clock" for clock-dr | A TCP implementation <bcp14>MUST</bcp14> use the above type of "clock" for clo | |||
iven selection of initial sequence numbers (MUST-8), and | ck-driven selection of initial sequence numbers (MUST-8), and | |||
SHOULD generate its Initial Sequence Numbers with the expression: | <bcp14>SHOULD</bcp14> generate its initial sequence numbers with the expressio | |||
n: | ||||
</t> | </t> | |||
<t> | <t> | |||
ISN = M + F(localip, localport, remoteip, remoteport, secretkey) | ISN = M + F(localip, localport, remoteip, remoteport, secretkey) | |||
</t> | </t> | |||
<t> | <t> | |||
where M is the 4 microsecond timer, and F() is a pseudorandom | where M is the 4 microsecond timer, and F() is a pseudorandom | |||
function (PRF) of the connection's identifying parameters ("localip, localpor t, remoteip, remoteport") and a secret key ("secretkey") (SHLD-1). F() MUST NOT be computable from the outside (MUST-9), or an attacker could still guess at se quence numbers from the ISN used for some other connection. The PRF could be im plemented as a cryptographic hash of the concatenation of the TCP connection par ameters and some secret data. For discussion of the selection of a specific has h algorithm and management of the secret key data, please see Section 3 of <xref target="RFC6528"/>. | function (PRF) of the connection's identifying parameters ("localip, localpor t, remoteip, remoteport") and a secret key ("secretkey") (SHLD-1). F() <bcp14>M UST NOT</bcp14> be computable from the outside (MUST-9), or an attacker could st ill guess at sequence numbers from the ISN used for some other connection. The PRF could be implemented as a cryptographic hash of the concatenation of the TCP connection parameters and some secret data. For discussion of the selection of a specific hash algorithm and management of the secret key data, please see <xr ef target="RFC6528" section="3" sectionFormat="of" format="default"/>. | |||
</t> | </t> | |||
<t> | ||||
<t> | ||||
For each connection there is a send sequence number and a receive | For each connection there is a send sequence number and a receive | |||
sequence number. The initial send sequence number (ISS) is chosen by | sequence number. The initial send sequence number (ISS) is chosen by | |||
the data sending TCP peer, and the initial receive sequence number (IRS) is | the data sending TCP peer, and the initial receive sequence number (IRS) is | |||
learned during the connection establishing procedure. | learned during the connection-establishing procedure. | |||
</t> | </t> | |||
<t> | <t> | |||
For a connection to be established or initialized, the two TCP peers must | For a connection to be established or initialized, the two TCP peers must | |||
synchronize on each other's initial sequence numbers. This is done in | synchronize on each other's initial sequence numbers. This is done in | |||
an exchange of connection establishing segments carrying a control bit | an exchange of connection-establishing segments carrying a control bit | |||
called "SYN" (for synchronize) and the initial sequence numbers. As a | called "SYN" (for synchronize) and the initial sequence numbers. As a | |||
shorthand, segments carrying the SYN bit are also called "SYNs". | shorthand, segments carrying the SYN bit are also called "SYNs". | |||
Hence, the solution requires a suitable mechanism for picking an | Hence, the solution requires a suitable mechanism for picking an | |||
initial sequence number and a slightly involved handshake to exchange | initial sequence number and a slightly involved handshake to exchange | |||
the ISNs. | the ISNs. | |||
</t> | </t> | |||
<t> | <t> | |||
The synchronization requires each side to send its own initial | The synchronization requires each side to send its own initial | |||
sequence number and to receive a confirmation of it in acknowledgment | sequence number and to receive a confirmation of it in acknowledgment | |||
from the remote TCP peer. Each side must also receive the remote peer's | from the remote TCP peer. Each side must also receive the remote peer's | |||
initial sequence number and send a confirming acknowledgment. | initial sequence number and send a confirming acknowledgment. | |||
</t> | </t> | |||
<t><figure><artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
1) A --> B SYN my sequence number is X | 1) A --> B SYN my sequence number is X | |||
2) A <-- B ACK your sequence number is X | 2) A <-- B ACK your sequence number is X | |||
3) A <-- B SYN my sequence number is Y | 3) A <-- B SYN my sequence number is Y | |||
4) A --> B ACK your sequence number is Y | 4) A --> B ACK your sequence number is Y | |||
</artwork></figure></t> | ]]></artwork> | |||
<t> | <t> | |||
Because steps 2 and 3 can be combined in a single message this is | Because steps 2 and 3 can be combined in a single message this is | |||
called the three-way (or three message) handshake (3WHS). | called the three-way (or three message) handshake (3WHS). | |||
</t> | </t> | |||
<t> | <t> | |||
A 3WHS is necessary because sequence numbers are not | A 3WHS is necessary because sequence numbers are not | |||
tied to a global clock in the network, and TCP implementations may have differ ent | tied to a global clock in the network, and TCP implementations may have differ ent | |||
mechanisms for picking the ISNs. The receiver of the first SYN has | mechanisms for picking the ISNs. The receiver of the first SYN has | |||
no way of knowing whether the segment was an old one or not, | no way of knowing whether the segment was an old one or not, | |||
unless it remembers the last sequence number used on the connection | unless it remembers the last sequence number used on the connection | |||
(which is not always possible), and so it must ask the sender to | (which is not always possible), and so it must ask the sender to | |||
verify this SYN. The three-way handshake and the advantages of a | verify this SYN. The three-way handshake and the advantages of a | |||
clock-driven scheme for ISN selection are discussed in <xref target="DS78"/>. | clock-driven scheme for ISN selection are discussed in <xref target="DS78" for mat="default"/>. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Knowing When to Keep Quiet"> | <section numbered="true" toc="default"> | |||
<t> | <name>Knowing When to Keep Quiet</name> | |||
<t> | ||||
A theoretical problem exists where data could be corrupted due to confusion | A theoretical problem exists where data could be corrupted due to confusion | |||
between old segments in the network and new ones after a host reboots, if the | between old segments in the network and new ones after a host reboots if the | |||
same port numbers and sequence space are reused. The "Quiet Time" | same port numbers and sequence space are reused. The "quiet time" | |||
concept discussed below addresses this and the discussion of it is included | concept discussed below addresses this, and the discussion of it is included | |||
for situations where it might be relevant, although it is not felt to be | for situations where it might be relevant, although it is not felt to be | |||
necessary in most current implementations. The problem was more relevant | necessary in most current implementations. The problem was more relevant | |||
earlier in the history of TCP. In practical use on the Internet today, the | earlier in the history of TCP. In practical use on the Internet today, the | |||
error-prone conditions are sufficiently unlikely that it is felt safe to | error-prone conditions are sufficiently unlikely that it is safe to | |||
ignore. Reasons why it is now negligible include: (a) ISS and ephemeral port | ignore. Reasons why it is now negligible include: (a) ISS and ephemeral port | |||
randomization have reduced likelihood of reuse of port numbers and sequence nu mbers | randomization have reduced likelihood of reuse of port numbers and sequence nu mbers | |||
after reboots, (b) the effective MSL of the Internet has declined as links | after reboots, (b) the effective MSL of the Internet has declined as links | |||
have become faster, and (c) reboots often taking longer than an MSL anyways. | have become faster, and (c) reboots often taking longer than an MSL anyways. | |||
</t> | </t> | |||
<t> | <t> | |||
To be sure that a TCP implementation does not create a segment carrying a | To be sure that a TCP implementation does not create a segment carrying a | |||
sequence number that may be duplicated by an old segment remaining in the | sequence number that may be duplicated by an old segment remaining in the | |||
network, the TCP endpoint must keep quiet for an MSL before assigning any | network, the TCP endpoint must keep quiet for an MSL before assigning any | |||
sequence numbers upon starting up or recovering from a situation where memory | sequence numbers upon starting up or recovering from a situation where memory | |||
of sequence numbers in use was lost. For this specification the MSL is taken | of sequence numbers in use was lost. For this specification the MSL is taken | |||
to be 2 minutes. This is an engineering choice, and may be changed if | to be 2 minutes. This is an engineering choice, and may be changed if | |||
experience indicates it is desirable to do so. Note that if a TCP endpoint | experience indicates it is desirable to do so. Note that if a TCP endpoint | |||
is reinitialized in some sense, yet retains its memory of sequence numbers in | is reinitialized in some sense, yet retains its memory of sequence numbers in | |||
use, then it need not wait at all; it must only be sure to use sequence | use, then it need not wait at all; it must only be sure to use sequence | |||
numbers larger than those recently used. | numbers larger than those recently used. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="The TCP Quiet Time Concept"> | <section anchor="tcp_quiet_time_concept" numbered="true" toc="default"> | |||
<t> | <name>The TCP Quiet Time Concept</name> | |||
<t> | ||||
Hosts that for any reason lose | Hosts that for any reason lose | |||
knowledge of the last sequence numbers transmitted on | knowledge of the last sequence numbers transmitted on | |||
each active (i.e., not closed) connection shall delay emitting any | each active (i.e., not closed) connection shall delay emitting any | |||
TCP segments for at least the agreed MSL | TCP segments for at least the agreed MSL | |||
in the internet system that the host is a part of. In the | in the internet system that the host is a part of. In the | |||
paragraphs below, an explanation for this specification is given. | paragraphs below, an explanation for this specification is given. | |||
TCP implementors may violate the "quiet time" restriction, but only | TCP implementers may violate the "quiet time" restriction, but only | |||
at the risk of causing some old data to be accepted as new or new | at the risk of causing some old data to be accepted as new or new | |||
data rejected as old duplicated data by some receivers in the internet | data rejected as old duplicated data by some receivers in the internet | |||
system. | system. | |||
</t> | </t> | |||
<t> | <t> | |||
TCP endpoints consume sequence number space each time a segment is formed an d | TCP endpoints consume sequence number space each time a segment is formed an d | |||
entered into the network output queue at a source host. The | entered into the network output queue at a source host. The | |||
duplicate detection and sequencing algorithm in the TCP protocol | duplicate detection and sequencing algorithm in TCP | |||
relies on the unique binding of segment data to sequence space to | relies on the unique binding of segment data to sequence space to | |||
the extent that sequence numbers will not cycle through all 2**32 | the extent that sequence numbers will not cycle through all 2<sup>32</sup> | |||
values before the segment data bound to those sequence numbers has | values before the segment data bound to those sequence numbers has | |||
been delivered and acknowledged by the receiver and all duplicate | been delivered and acknowledged by the receiver and all duplicate | |||
copies of the segments have "drained" from the internet. Without | copies of the segments have "drained" from the internet. Without | |||
such an assumption, two distinct TCP segments could conceivably be | such an assumption, two distinct TCP segments could conceivably be | |||
assigned the same or overlapping sequence numbers, causing confusion | assigned the same or overlapping sequence numbers, causing confusion | |||
at the receiver as to which data is new and which is old. Remember | at the receiver as to which data is new and which is old. Remember | |||
that each segment is bound to as many consecutive sequence numbers | that each segment is bound to as many consecutive sequence numbers | |||
as there are octets of data and SYN or FIN flags in the segment. | as there are octets of data and SYN or FIN flags in the segment. | |||
</t> | </t> | |||
<t> | <t> | |||
Under normal conditions, TCP implementations keep track of the next sequence number | Under normal conditions, TCP implementations keep track of the next sequence number | |||
to emit and the oldest awaiting acknowledgment so as to avoid | to emit and the oldest awaiting acknowledgment so as to avoid | |||
mistakenly using a sequence number over before its first use has | mistakenly reusing a sequence number before its first use has | |||
been acknowledged. This alone does not guarantee that old duplicate | been acknowledged. This alone does not guarantee that old duplicate | |||
data is drained from the net, so the sequence space has been made | data is drained from the net, so the sequence space has been made | |||
large to reduce the probability that a wandering duplicate will | large to reduce the probability that a wandering duplicate will | |||
cause trouble upon arrival. At 2 megabits/sec. it takes 4.5 hours | cause trouble upon arrival. At 2 megabits/sec., it takes 4.5 hours | |||
to use up 2**32 octets of sequence space. Since the maximum segment | to use up 2<sup>32</sup> octets of sequence space. Since the maximum segmen | |||
t | ||||
lifetime in the net is not likely to exceed a few tens of seconds, | lifetime in the net is not likely to exceed a few tens of seconds, | |||
this is deemed ample protection for foreseeable nets, even if data | this is deemed ample protection for foreseeable nets, even if data | |||
rates escalate to 10s of megabits/sec. At 100 megabits/sec, the | rates escalate to 10s of megabits/sec. At 100 megabits/sec., the | |||
cycle time is 5.4 minutes, which may be a little short, but still | cycle time is 5.4 minutes, which may be a little short but still | |||
within reason. Much higher data rates are possible today, with implications | within reason. Much higher data rates are possible today, with implications | |||
described in the final paragraph of this subsection. | described in the final paragraph of this subsection. | |||
</t> | </t> | |||
<t> | <t> | |||
The basic duplicate detection and sequencing algorithm in TCP can be | The basic duplicate detection and sequencing algorithm in TCP can be | |||
defeated, however, if a source TCP endpoint does not have any memory of the | defeated, however, if a source TCP endpoint does not have any memory of the | |||
sequence numbers it last used on a given connection. For example, if | sequence numbers it last used on a given connection. For example, if | |||
the TCP implementation were to start all connections with sequence number 0, then | the TCP implementation were to start all connections with sequence number 0, then | |||
upon the host rebooting, a TCP peer might re-form an earlier | upon the host rebooting, a TCP peer might re-form an earlier | |||
connection (possibly after half-open connection resolution) and emit | connection (possibly after half-open connection resolution) and emit | |||
packets with sequence numbers identical to or overlapping with | packets with sequence numbers identical to or overlapping with | |||
packets still in the network, which were emitted on an earlier | packets still in the network, which were emitted on an earlier | |||
incarnation of the same connection. In the absence of knowledge | incarnation of the same connection. In the absence of knowledge | |||
about the sequence numbers used on a particular connection, the TCP | about the sequence numbers used on a particular connection, the TCP | |||
specification recommends that the source delay for MSL seconds | specification recommends that the source delay for MSL seconds | |||
before emitting segments on the connection, to allow time for | before emitting segments on the connection, to allow time for | |||
segments from the earlier connection incarnation to drain from the | segments from the earlier connection incarnation to drain from the | |||
system. | system. | |||
</t> | </t> | |||
<t> | <t> | |||
Even hosts that can remember the time of day and used it to select | Even hosts that can remember the time of day and use it to select | |||
initial sequence number values are not immune from this problem | initial sequence number values are not immune from this problem | |||
(i.e., even if time of day is used to select an initial sequence | (i.e., even if time of day is used to select an initial sequence | |||
number for each new connection incarnation). | number for each new connection incarnation). | |||
</t> | </t> | |||
<t> | <t> | |||
Suppose, for example, that a connection is opened starting with | Suppose, for example, that a connection is opened starting with | |||
sequence number S. Suppose that this connection is not used much | sequence number S. Suppose that this connection is not used much | |||
and that eventually the initial sequence number function (ISN(t)) | and that eventually the initial sequence number function (ISN(t)) | |||
takes on a value equal to the sequence number, say S1, of the last | takes on a value equal to the sequence number, say S1, of the last | |||
segment sent by this TCP endpoint on a particular connection. Now suppose, | segment sent by this TCP endpoint on a particular connection. Now suppose, | |||
at this instant, the host reboots and establishes a new | at this instant, the host reboots and establishes a new | |||
incarnation of the connection. The initial sequence number chosen is | incarnation of the connection. The initial sequence number chosen is | |||
S1 = ISN(t) -- last used sequence number on old incarnation of | S1 = ISN(t) -- last used sequence number on old incarnation of | |||
connection! If the recovery occurs quickly enough, any old | connection! If the recovery occurs quickly enough, any old | |||
duplicates in the net bearing sequence numbers in the neighborhood | duplicates in the net bearing sequence numbers in the neighborhood | |||
of S1 may arrive and be treated as new packets by the receiver of | of S1 may arrive and be treated as new packets by the receiver of | |||
the new incarnation of the connection. | the new incarnation of the connection. | |||
</t> | </t> | |||
<t> | <t> | |||
The problem is that the recovering host may not know for how long it | The problem is that the recovering host may not know for how long it | |||
was down between rebooting nor does it know whether there are still old dupl icates in | was down between rebooting nor does it know whether there are still old dupl icates in | |||
the system from earlier connection incarnations. | the system from earlier connection incarnations. | |||
</t> | </t> | |||
<t> | <t> | |||
One way to deal with this problem is to deliberately delay emitting | One way to deal with this problem is to deliberately delay emitting | |||
segments for one MSL after recovery from a reboot - this is the "quiet | segments for one MSL after recovery from a reboot -- this is the "quiet | |||
time" specification. Hosts that prefer to avoid waiting and are | time" specification. Hosts that prefer to avoid waiting and are | |||
willing to risk possible confusion of old and new packets at a given | willing to risk possible confusion of old and new packets at a given | |||
destination may choose not to wait for the "quiet time". | destination may choose not to wait for the "quiet time". | |||
Implementors may provide TCP users with the ability to select on a | Implementers may provide TCP users with the ability to select on a | |||
connection by connection basis whether to wait after a reboot, or may | connection-by-connection basis whether to wait after a reboot, or may | |||
informally implement the "quiet time" for all connections. | informally implement the "quiet time" for all connections. | |||
Obviously, even where a user selects to "wait," this is not | Obviously, even where a user selects to "wait", this is not | |||
necessary after the host has been "up" for at least MSL seconds. | necessary after the host has been "up" for at least MSL seconds. | |||
</t> | </t> | |||
<t> | <t> | |||
To summarize: every segment emitted occupies one or more sequence | To summarize: every segment emitted occupies one or more sequence | |||
numbers in the sequence space, the numbers occupied by a segment are | numbers in the sequence space, and the numbers occupied by a segment are | |||
"busy" or "in use" until MSL seconds have passed, upon r | "busy" or "in use" until MSL seconds have passed. Upon rebooting, a | |||
ebooting a | ||||
block of space-time is occupied by the octets and SYN or FIN flags of any po tentially still in-flight | block of space-time is occupied by the octets and SYN or FIN flags of any po tentially still in-flight | |||
segments, and if a new connection is started too soon and uses any of the | segments. If a new connection is started too soon and uses any of the | |||
sequence numbers in the space-time footprint of those potentially still in-f light segments of | sequence numbers in the space-time footprint of those potentially still in-f light segments of | |||
the previous connection incarnation, there is a potential sequence | the previous connection incarnation, there is a potential sequence | |||
number overlap area that could cause confusion at the receiver. | number overlap area that could cause confusion at the receiver. | |||
</t> | </t> | |||
<t> | <t> | |||
High performance cases will have shorter cycle times than those in the | High-performance cases will have shorter cycle times than those in the | |||
megabits per second that the base TCP design described above considers. | megabits per second that the base TCP design described above considers. | |||
At 1 Gbps, the cycle time is 34 seconds, only 3 seconds at 10 Gbps, and | At 1 Gbps, the cycle time is 34 seconds, only 3 seconds at 10 Gbps, and | |||
around a third of a second at 100 Gbps. In these higher performance cases, | around a third of a second at 100 Gbps. In these higher-performance cases, | |||
TCP Timestamp options and Protection Against Wrapped Sequences (PAWS) <xref | TCP Timestamp Options and Protection Against Wrapped Sequences (PAWS) <xref | |||
target="RFC7323"/> provide the needed capability to detect and discard old | target="RFC7323" format="default"/> provide the needed capability to detect and | |||
discard old | ||||
duplicates. | duplicates. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section title="Establishing a connection"> | <section numbered="true" toc="default"> | |||
<t> | <name>Establishing a Connection</name> | |||
The "three-way handshake" is the procedure used to establish a | <t> | |||
The "three-way handshake" is the procedure used to establish a | ||||
connection. This procedure normally is initiated by one TCP peer and | connection. This procedure normally is initiated by one TCP peer and | |||
responded to by another TCP peer. The procedure also works if two TCP peers | responded to by another TCP peer. The procedure also works if two TCP peers | |||
simultaneously initiate the procedure. When simultaneous open | simultaneously initiate the procedure. When simultaneous open | |||
occurs, each TCP peer receives a "SYN" segment that carries no | occurs, each TCP peer receives a SYN segment that carries no | |||
acknowledgment after it has sent a "SYN". Of course, the arrival of | acknowledgment after it has sent a SYN. Of course, the arrival of | |||
an old duplicate "SYN" segment can potentially make it appear, to th | an old duplicate SYN segment can potentially make it appear, to the | |||
e | ||||
recipient, that a simultaneous connection initiation is in progress. | recipient, that a simultaneous connection initiation is in progress. | |||
Proper use of "reset" segments can disambiguate these cases. | Proper use of "reset" segments can disambiguate these cases. | |||
</t> | </t> | |||
<t> | <t> | |||
Several examples of connection initiation follow. Although these | Several examples of connection initiation follow. Although these | |||
examples do not show connection synchronization using data-carrying | examples do not show connection synchronization using data-carrying | |||
segments, this is perfectly legitimate, so long as the receiving TCP endpoint | segments, this is perfectly legitimate, so long as the receiving TCP endpoint | |||
doesn't deliver the data to the user until it is clear the data is | doesn't deliver the data to the user until it is clear the data is | |||
valid (e.g., the data is buffered at the receiver until the | valid (e.g., the data is buffered at the receiver until the | |||
connection reaches the ESTABLISHED state, given that the three-way handshake | connection reaches the ESTABLISHED state, given that the three-way handshake | |||
reduces the possibility of false connections). It is | reduces the possibility of false connections). It is | |||
a trade-off between memory and messages to provide | a trade-off between memory and messages to provide | |||
information for this checking. | information for this checking. | |||
</t> | </t> | |||
<t> | <t> | |||
The simplest 3WHS is shown in <xref target="handshake" />. The | The simplest 3WHS is shown in <xref target="handshake" format="default"/>. Th | |||
e | ||||
figures should be interpreted in the following way. Each line is | figures should be interpreted in the following way. Each line is | |||
numbered for reference purposes. Right arrows (-->) indicate | numbered for reference purposes. Right arrows (-->) indicate | |||
departure of a TCP segment from TCP peer A to TCP peer B, or arrival of a | departure of a TCP segment from TCP Peer A to TCP Peer B or arrival of a | |||
segment at B from A. Left arrows (<--), indicate the reverse. | segment at B from A. Left arrows (<--) indicate the reverse. | |||
Ellipsis (...) indicates a segment that is still in the network | Ellipses (...) indicate a segment that is still in the network | |||
(delayed). | (delayed). | |||
Comments appear in parentheses. TCP connection states represent the state AFT ER | Comments appear in parentheses. TCP connection states represent the state AFT ER | |||
the departure or arrival of the segment (whose contents are shown in | the departure or arrival of the segment (whose contents are shown in | |||
the center of each line). Segment contents are shown in abbreviated | the center of each line). Segment contents are shown in abbreviated | |||
form, with sequence number, control flags, and ACK field. Other | form, with sequence number, control flags, and ACK field. Other | |||
fields such as window, addresses, lengths, and text have been left out | fields such as window, addresses, lengths, and text have been left out | |||
in the interest of clarity. | in the interest of clarity. | |||
</t> | </t> | |||
<figure anchor="handshake" title="Basic 3-Way Handshake for Connection Synchroni | <figure anchor="handshake"> | |||
zation"> | <name>Basic Three-Way Handshake for Connection Synchronization</name> | |||
<artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. CLOSED LISTEN | 1. CLOSED LISTEN | |||
2. SYN-SENT --> <SEQ=100><CTL=SYN> --> SYN-RECE IVED | 2. SYN-SENT --> <SEQ=100><CTL=SYN> --> SYN-RECEIVED | |||
3. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED | 3. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED | |||
4. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK> --> ; ESTABLISHED | 4. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK> --> ESTABLISHED | |||
5. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK><DATA> | 5. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK><DATA> --> ESTABLISHED | |||
--> ESTABLISHED | ]]></artwork> | |||
</artwork> | </figure> | |||
</figure> | <t> | |||
<t> | In line 2 of <xref target="handshake" format="default"/>, TCP Peer A begins by | |||
In line 2 of <xref target="handshake" />, TCP Peer A begins by sending a SYN s | sending a SYN segment | |||
egment | ||||
indicating that it will use sequence numbers starting with sequence | indicating that it will use sequence numbers starting with sequence | |||
number 100. In line 3, TCP Peer B sends a SYN and acknowledges the SYN it | number 100. In line 3, TCP Peer B sends a SYN and acknowledges the SYN it | |||
received from TCP Peer A. Note that the acknowledgment field indicates TCP Pe er | received from TCP Peer A. Note that the acknowledgment field indicates TCP Pe er | |||
B is now expecting to hear sequence 101, acknowledging the SYN that | B is now expecting to hear sequence 101, acknowledging the SYN that | |||
occupied sequence 100. | occupied sequence 100. | |||
</t> | </t> | |||
<t> | <t> | |||
At line 4, TCP Peer A responds with an empty segment containing an ACK for | At line 4, TCP Peer A responds with an empty segment containing an ACK for | |||
TCP Peer B's SYN; and in line 5, TCP Peer A sends some data. Note that the | TCP Peer B's SYN; and in line 5, TCP Peer A sends some data. Note that the | |||
sequence number of the segment in line 5 is the same as in line 4 | sequence number of the segment in line 5 is the same as in line 4 | |||
because the ACK does not occupy sequence number space (if it did, we | because the ACK does not occupy sequence number space (if it did, we | |||
would wind up ACKing ACKs!). | would wind up ACKing ACKs!). | |||
</t> | </t> | |||
<t> | <t> | |||
Simultaneous initiation is only slightly more complex, as is shown in | Simultaneous initiation is only slightly more complex, as is shown in | |||
<xref target="simul_connect" />. Each TCP peer's connection state cycles from CLOSED to SYN-SENT to SYN-RECEIVED to ESTABLISHED. | <xref target="simul_connect" format="default"/>. Each TCP peer's connection s tate cycles from CLOSED to SYN-SENT to SYN-RECEIVED to ESTABLISHED. | |||
</t> | </t> | |||
<figure anchor="simul_connect" title="Simultaneous Connection Synchronization"> | <figure anchor="simul_connect"> | |||
<artwork> | <name>Simultaneous Connection Synchronization</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. CLOSED CLOSED | 1. CLOSED CLOSED | |||
2. SYN-SENT --> <SEQ=100><CTL=SYN> ... | 2. SYN-SENT --> <SEQ=100><CTL=SYN> ... | |||
3. SYN-RECEIVED <-- <SEQ=300><CTL=SYN> <-- SYN-S ENT | 3. SYN-RECEIVED <-- <SEQ=300><CTL=SYN> <-- SYN-SENT | |||
4. ... <SEQ=100><CTL=SYN> --> SYN-RECEIVE D | 4. ... <SEQ=100><CTL=SYN> --> SYN-RECEIVED | |||
5. SYN-RECEIVED --> <SEQ=100><ACK=301><CTL=SYN,ACK> ... | 5. SYN-RECEIVED --> <SEQ=100><ACK=301><CTL=SYN,ACK> ... | |||
6. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED | 6. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED | |||
7. ... <SEQ=100><ACK=301><CTL=SYN,ACK> --> ES | 7. ... <SEQ=100><ACK=301><CTL=SYN,ACK> --> ESTABLISHED | |||
TABLISHED | ]]></artwork> | |||
</artwork> | </figure> | |||
</figure> | <t> | |||
<t> | A TCP implementation <bcp14>MUST</bcp14> support simultaneous open attempts (MUS | |||
A TCP implementation MUST support simultaneous open attempts (MUST-10). | T-10). | |||
</t> | </t> | |||
<t> | <t> | |||
Note that a TCP implementation MUST keep track of whether a | Note that a TCP implementation <bcp14>MUST</bcp14> keep track of whether a | |||
connection has reached SYN-RECEIVED state as the result of a | connection has reached SYN-RECEIVED state as the result of a | |||
passive OPEN or an active OPEN (MUST-11). | passive OPEN or an active OPEN (MUST-11). | |||
</t> | </t> | |||
<t> | <t> | |||
The principal reason for the three-way handshake is to prevent old | The principal reason for the three-way handshake is to prevent old | |||
duplicate connection initiations from causing confusion. To deal with | duplicate connection initiations from causing confusion. To deal with | |||
this, a special control message, reset, is specified. If the | this, a special control message, reset, is specified. If the | |||
receiving TCP peer is in a non-synchronized state (i.e., SYN-SENT, | receiving TCP peer is in a non-synchronized state (i.e., SYN-SENT, | |||
SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. | SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. | |||
If the TCP peer is in one of the synchronized states (ESTABLISHED, | If the TCP peer is in one of the synchronized states (ESTABLISHED, | |||
FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it | FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it | |||
aborts the connection and informs its user. We discuss this latter | aborts the connection and informs its user. We discuss this latter | |||
case under "half-open" connections below. | case under "half-open" connections below. | |||
</t> | </t> | |||
<figure anchor="dup_syn" title="Recovery from Old Duplicate SYN"> | <figure anchor="dup_syn"> | |||
<artwork> | <name>Recovery from Old Duplicate SYN</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. CLOSED LISTEN | 1. CLOSED LISTEN | |||
2. SYN-SENT --> <SEQ=100><CTL=SYN> ... | 2. SYN-SENT --> <SEQ=100><CTL=SYN> ... | |||
3. (duplicate) ... <SEQ=90><CTL=SYN> --> SYN-RECEIVED | 3. (duplicate) ... <SEQ=90><CTL=SYN> --> SYN-RECEIVED | |||
4. SYN-SENT <-- <SEQ=300><ACK=91><CTL=SYN,ACK> <-- SYN-RECEI VED | 4. SYN-SENT <-- <SEQ=300><ACK=91><CTL=SYN,ACK> <-- SYN-RECEIVED | |||
5. SYN-SENT --> <SEQ=91><CTL=RST> --> LISTEN | 5. SYN-SENT --> <SEQ=91><CTL=RST> --> LISTEN | |||
6. ... <SEQ=100><CTL=SYN> --> SYN-RECEIVED | 6. ... <SEQ=100><CTL=SYN> --> SYN-RECEIVED | |||
7. ESTABLISHED <-- <SEQ=400><ACK=101><CTL=SYN,ACK> <-- SYN-RECE IVED | 7. ESTABLISHED <-- <SEQ=400><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED | |||
8. ESTABLISHED --> <SEQ=101><ACK=401><CTL=ACK> --> ESTABLISHED | 8. ESTABLISHED --> <SEQ=101><ACK=401><CTL=ACK> --> ESTABLISHED | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
As a simple example of recovery from old duplicates, consider | As a simple example of recovery from old duplicates, consider | |||
<xref target="dup_syn" />. At line 3, an old duplicate SYN arrives at TCP Pee r B. TCP Peer B | <xref target="dup_syn" format="default"/>. At line 3, an old duplicate SYN ar rives at TCP Peer B. TCP Peer B | |||
cannot tell that this is an old duplicate, so it responds normally | cannot tell that this is an old duplicate, so it responds normally | |||
(line 4). TCP Peer A detects that the ACK field is incorrect and returns a | (line 4). TCP Peer A detects that the ACK field is incorrect and returns a | |||
RST (reset) with its SEQ field selected to make the segment | RST (reset) with its SEQ field selected to make the segment | |||
believable. TCP Peer B, on receiving the RST, returns to the LISTEN state. | believable. TCP Peer B, on receiving the RST, returns to the LISTEN state. | |||
When the original SYN finally arrives at line 6, the | When the original SYN finally arrives at line 6, the | |||
synchronization proceeds normally. If the SYN at line 6 had arrived | synchronization proceeds normally. If the SYN at line 6 had arrived | |||
before the RST, a more complex exchange might have occurred with RST's | before the RST, a more complex exchange might have occurred with RSTs | |||
sent in both directions. | sent in both directions. | |||
</t> | </t> | |||
<section title="Half-Open Connections and Other Anomalies"> | <section numbered="true" toc="default"> | |||
<t> | <name>Half-Open Connections and Other Anomalies</name> | |||
An established connection is said to be "half-open" if one of the | <t> | |||
An established connection is said to be "half-open" if one of the | ||||
TCP peers has closed or aborted the connection at its end without the | TCP peers has closed or aborted the connection at its end without the | |||
knowledge of the other, or if the two ends of the connection have | knowledge of the other, or if the two ends of the connection have | |||
become desynchronized owing to a failure or reboot that resulted in loss of | become desynchronized owing to a failure or reboot that resulted in loss of | |||
memory. Such connections will automatically become reset if an | memory. Such connections will automatically become reset if an | |||
attempt is made to send data in either direction. However, half-open | attempt is made to send data in either direction. However, half-open | |||
connections are expected to be unusual. | connections are expected to be unusual. | |||
</t> | </t> | |||
<t> | <t> | |||
If at site A the connection no longer exists, then an attempt by the | If at site A the connection no longer exists, then an attempt by the | |||
user at site B to send any data on it will result in the site B TCP endpoint | user at site B to send any data on it will result in the site B TCP endpoint | |||
receiving a reset control message. Such a message indicates to the | receiving a reset control message. Such a message indicates to the | |||
site B TCP endpoint that something is wrong, and it is expected to abort the | site B TCP endpoint that something is wrong, and it is expected to abort the | |||
connection. | connection. | |||
</t> | </t> | |||
<t> | <t> | |||
Assume that two user processes A and B are communicating with one | Assume that two user processes A and B are communicating with one | |||
another when a failure or reboot occurs causing loss of memory to A's TCP impl ementation. | another when a failure or reboot occurs causing loss of memory to A's TCP impl ementation. | |||
Depending on the operating system supporting A's TCP implementation, it is lik ely | Depending on the operating system supporting A's TCP implementation, it is lik ely | |||
that some error recovery mechanism exists. When the TCP endpoint is up again, | that some error recovery mechanism exists. When the TCP endpoint is up again, | |||
A is likely to start again from the beginning or from a recovery | A is likely to start again from the beginning or from a recovery | |||
point. As a result, A will probably try to OPEN the connection again | point. As a result, A will probably try to OPEN the connection again | |||
or try to SEND on the connection it believes open. In the latter | or try to SEND on the connection it believes open. In the latter | |||
case, it receives the error message "connection not open" from the | case, it receives the error message "connection not open" from the | |||
local (A's) TCP implementation. In an attempt to establish the connection, A' s TCP implementation | local (A's) TCP implementation. In an attempt to establish the connection, A' s TCP implementation | |||
will send a segment containing SYN. This scenario leads to the | will send a segment containing SYN. This scenario leads to the | |||
example shown in <xref target="half_open" />. After TCP Peer A reboots, the u | example shown in <xref target="half_open" format="default"/>. After TCP Peer | |||
ser attempts to | A reboots, the user attempts to | |||
re-open the connection. TCP Peer B, in the meantime, thinks the connection | reopen the connection. TCP Peer B, in the meantime, thinks the connection | |||
is open. | is open. | |||
</t> | </t> | |||
<figure anchor="half_open" title="Half-Open Connection Discovery"> | <figure anchor="half_open"> | |||
<artwork> | <name>Half-Open Connection Discovery</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. (REBOOT) (send 300,receive 100) | 1. (REBOOT) (send 300,receive 100) | |||
2. CLOSED ESTABLISHED | 2. CLOSED ESTABLISHED | |||
3. SYN-SENT --> <SEQ=400><CTL=SYN> --> (??) | 3. SYN-SENT --> <SEQ=400><CTL=SYN> --> (??) | |||
4. (!!) <-- <SEQ=300><ACK=100><CTL=ACK> <-- ESTABLISHE D | 4. (!!) <-- <SEQ=300><ACK=100><CTL=ACK> <-- ESTABLISHED | |||
5. SYN-SENT --> <SEQ=100><CTL=RST> --> (Abort!!) | 5. SYN-SENT --> <SEQ=100><CTL=RST> --> (Abort!!) | |||
6. SYN-SENT CLOSED | 6. SYN-SENT CLOSED | |||
7. SYN-SENT --> <SEQ=400><CTL=SYN> --> | 7. SYN-SENT --> <SEQ=400><CTL=SYN> --> | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
When the SYN arrives at line 3, TCP Peer B, being in a synchronized state, | When the SYN arrives at line 3, TCP Peer B, being in a synchronized state, | |||
and the incoming segment outside the window, responds with an | and the incoming segment outside the window, responds with an | |||
acknowledgment indicating what sequence it next expects to hear (ACK | acknowledgment indicating what sequence it next expects to hear (ACK | |||
100). TCP Peer A sees that this segment does not acknowledge anything it | 100). TCP Peer A sees that this segment does not acknowledge anything it | |||
sent and, being unsynchronized, sends a reset (RST) because it has | sent and, being unsynchronized, sends a reset (RST) because it has | |||
detected a half-open connection. TCP Peer B aborts at line 5. TCP Peer A wil l | detected a half-open connection. TCP Peer B aborts at line 5. TCP Peer A wil l | |||
continue to try to establish the connection; the problem is now | continue to try to establish the connection; the problem is now | |||
reduced to the basic 3-way handshake of <xref target="handshake" />. | reduced to the basic three-way handshake of <xref target="handshake" format="d efault"/>. | |||
</t> | </t> | |||
<t> | <t> | |||
An interesting alternative case occurs when TCP Peer A reboots and TCP Peer B | An interesting alternative case occurs when TCP Peer A reboots and TCP Peer B | |||
tries to send data on what it thinks is a synchronized connection. | tries to send data on what it thinks is a synchronized connection. | |||
This is illustrated in <xref target="crash" />. In this case, the data arrivi ng at | This is illustrated in <xref target="crash" format="default"/>. In this case, the data arriving at | |||
TCP Peer A from TCP Peer B (line 2) is unacceptable because no such connection | TCP Peer A from TCP Peer B (line 2) is unacceptable because no such connection | |||
exists, so TCP Peer A sends a RST. The RST is acceptable so TCP Peer B | exists, so TCP Peer A sends a RST. The RST is acceptable so TCP Peer B | |||
processes it and aborts the connection. | processes it and aborts the connection. | |||
</t> | </t> | |||
<figure anchor="crash" title="Active Side Causes Half-Open Connection Discover | <figure anchor="crash"> | |||
y"> | <name>Active Side Causes Half-Open Connection Discovery</name> | |||
<artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. (REBOOT) (send 300,receive 100) | 1. (REBOOT) (send 300,receive 100) | |||
2. (??) <-- <SEQ=300><ACK=100><DATA=10><CTL=ACK> <-- ESTAB LISHED | 2. (??) <-- <SEQ=300><ACK=100><DATA=10><CTL=ACK> <-- ESTABLISHED | |||
3. --> <SEQ=100><CTL=RST> --> (ABORT!!) | 3. --> <SEQ=100><CTL=RST> --> (ABORT!!) | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
In <xref target="passive_reset" />, two TCP Peers A and B with passive connect | In <xref target="passive_reset" format="default"/>, two TCP Peers A and B with | |||
ions | passive connections | |||
waiting for SYN are depicted. An old duplicate arriving at TCP Peer B (line 2 ) stirs B | waiting for SYN are depicted. An old duplicate arriving at TCP Peer B (line 2 ) stirs B | |||
into action. A SYN-ACK is returned (line 3) and causes TCP A to | into action. A SYN-ACK is returned (line 3) and causes TCP A to | |||
generate a RST (the ACK in line 3 is not acceptable). TCP Peer B accepts | generate a RST (the ACK in line 3 is not acceptable). TCP Peer B accepts | |||
the reset and returns to its passive LISTEN state. | the reset and returns to its passive LISTEN state. | |||
</t> | </t> | |||
<figure anchor="passive_reset" title="Old Duplicate SYN Initiates a Reset on two | <figure anchor="passive_reset"> | |||
Passive Sockets"> | <name>Old Duplicate SYN Initiates a Reset on Two Passive Sockets</na | |||
<artwork> | me> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. LISTEN LISTEN | 1. LISTEN LISTEN | |||
2. ... <SEQ=Z><CTL=SYN> --> SYN-RECEIVED | 2. ... <SEQ=Z><CTL=SYN> --> SYN-RECEIVED | |||
3. (??) <-- <SEQ=X><ACK=Z+1><CTL=SYN,ACK> <-- SYN-RECEIVED | 3. (??) <-- <SEQ=X><ACK=Z+1><CTL=SYN,ACK> <-- SYN-RECEIVED | |||
4. --> <SEQ=Z+1><CTL=RST> --> (return to LISTEN!) | 4. --> <SEQ=Z+1><CTL=RST> --> (return to LISTEN!) | |||
5. LISTEN LISTEN | 5. LISTEN LISTEN | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
A variety of other cases are possible, all of which are accounted for | A variety of other cases are possible, all of which are accounted for | |||
by the following rules for RST generation and processing. | by the following rules for RST generation and processing. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Reset Generation"> | <section numbered="true" toc="default"> | |||
<t> | <name>Reset Generation</name> | |||
A TCP user or application can issue a reset on a connection at any time, thoug | <t> | |||
h reset events are also generated by the protocol itself when various error cond | A TCP user or application can issue a reset on a connection at any time, thoug | |||
itions occur, as described below. The side of a connection issuing a reset shou | h reset events are also generated by the protocol itself when various error cond | |||
ld enter the TIME-WAIT state, as this generally helps to reduce the load on busy | itions occur, as described below. The side of a connection issuing a reset shou | |||
servers for reasons described in <xref target="FTY99"/>. | ld enter the TIME-WAIT state, as this generally helps to reduce the load on busy | |||
servers for reasons described in <xref target="FTY99" format="default"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
As a general rule, reset (RST) is sent whenever a segment arrives | As a general rule, reset (RST) is sent whenever a segment arrives | |||
that apparently is not intended for the current connection. A reset | that apparently is not intended for the current connection. A reset | |||
must not be sent if it is not clear that this is the case. | must not be sent if it is not clear that this is the case. | |||
</t> | </t> | |||
<t> | <t> | |||
There are three groups of states: | There are three groups of states: | |||
</t> | </t> | |||
<t><list> | <ol type="1" spacing="normal"> | |||
<t> | <li> | |||
1. If the connection does not exist (CLOSED) then a reset is sent | <t> | |||
If the connection does not exist (CLOSED), then a reset is sent | ||||
in response to any incoming segment except another reset. A SYN | in response to any incoming segment except another reset. A SYN | |||
segment that does not match an existing connection is rejected | segment that does not match an existing connection is rejected | |||
by this means. | by this means. | |||
</t> | </t> | |||
<t> | <t> | |||
If the incoming segment has the ACK bit set, the reset takes its | If the incoming segment has the ACK bit set, the reset takes its | |||
sequence number from the ACK field of the segment, otherwise the | sequence number from the ACK field of the segment; otherwise, the | |||
reset has sequence number zero and the ACK field is set to the sum | reset has sequence number zero and the ACK field is set to the sum | |||
of the sequence number and segment length of the incoming segment. | of the sequence number and segment length of the incoming segment. | |||
The connection remains in the CLOSED state. | The connection remains in the CLOSED state. | |||
</t> | </t> | |||
<t> | </li> | |||
2. If the connection is in any non-synchronized state (LISTEN, | <li> | |||
<t> | ||||
If the connection is in any non-synchronized state (LISTEN, | ||||
SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges | SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges | |||
something not yet sent (the segment carries an unacceptable ACK), or | something not yet sent (the segment carries an unacceptable ACK), or | |||
if an incoming segment has a security level or compartment <xref target="sec comp"/> that | if an incoming segment has a security level or compartment (<xref target="se ccomp" format="default"/>) that | |||
does not exactly match the level and compartment requested for the | does not exactly match the level and compartment requested for the | |||
connection, a reset is sent. | connection, a reset is sent. | |||
</t> | </t> | |||
<t> | <t> | |||
If the incoming segment has an ACK field, the reset takes its | If the incoming segment has an ACK field, the reset takes its | |||
sequence number from the ACK field of the segment, otherwise the | sequence number from the ACK field of the segment; otherwise, the | |||
reset has sequence number zero and the ACK field is set to the sum | reset has sequence number zero and the ACK field is set to the sum | |||
of the sequence number and segment length of the incoming segment. | of the sequence number and segment length of the incoming segment. | |||
The connection remains in the same state. | The connection remains in the same state. | |||
</t> | </t> | |||
<t> | </li> | |||
3. If the connection is in a synchronized state (ESTABLISHED, | <li> | |||
<t> | ||||
If the connection is in a synchronized state (ESTABLISHED, | ||||
FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), | FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), | |||
any unacceptable segment (out of window sequence number or | any unacceptable segment (out-of-window sequence number or | |||
unacceptable acknowledgment number) must be responded to with an empty | unacceptable acknowledgment number) must be responded to with an empty | |||
acknowledgment segment (without any user data) containing the current send-s equence number | acknowledgment segment (without any user data) containing the current send s equence number | |||
and an acknowledgment indicating the next sequence number expected | and an acknowledgment indicating the next sequence number expected | |||
to be received, and the connection remains in the same state. | to be received, and the connection remains in the same state. | |||
</t> | </t> | |||
<t> | <t> | |||
If an incoming segment has a security level or compartment | If an incoming segment has a security level or compartment | |||
that does not exactly match the level and compartment | that does not exactly match the level and compartment | |||
requested for the connection, a reset is sent and | requested for the connection, a reset is sent and | |||
the connection goes to the CLOSED state. The reset takes its sequence | the connection goes to the CLOSED state. The reset takes its sequence | |||
number from the ACK field of the incoming segment. | number from the ACK field of the incoming segment. | |||
</t> | </t> | |||
</list></t> | </li> | |||
</section> | </ol> | |||
<section title="Reset Processing"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<name>Reset Processing</name> | ||||
<t> | ||||
In all states except SYN-SENT, all reset (RST) segments are validated | In all states except SYN-SENT, all reset (RST) segments are validated | |||
by checking their SEQ-fields. A reset is valid if its sequence number | by checking their SEQ fields. A reset is valid if its sequence number | |||
is in the window. In the SYN-SENT state (a RST received in response | is in the window. In the SYN-SENT state (a RST received in response | |||
to an initial SYN), the RST is acceptable if the ACK field | to an initial SYN), the RST is acceptable if the ACK field | |||
acknowledges the SYN. | acknowledges the SYN. | |||
</t> | </t> | |||
<t> | <t> | |||
The receiver of a RST first validates it, then changes state. If the | The receiver of a RST first validates it, then changes state. If the | |||
receiver was in the LISTEN state, it ignores it. If the receiver was | receiver was in the LISTEN state, it ignores it. If the receiver was | |||
in SYN-RECEIVED state and had previously been in the LISTEN state, | in SYN-RECEIVED state and had previously been in the LISTEN state, | |||
then the receiver returns to the LISTEN state, otherwise the receiver | then the receiver returns to the LISTEN state; otherwise, the receiver | |||
aborts the connection and goes to the CLOSED state. If the receiver | aborts the connection and goes to the CLOSED state. If the receiver | |||
was in any other state, it aborts the connection and advises the user | was in any other state, it aborts the connection and advises the user | |||
and goes to the CLOSED state. | and goes to the CLOSED state. | |||
</t> | </t> | |||
<t> | <t> | |||
TCP implementations SHOULD allow a received RST segment to include data (SHLD- | TCP implementations <bcp14>SHOULD</bcp14> allow a received RST segment to incl | |||
2). | ude data (SHLD-2). | |||
It has been suggested that a RST segment could contain diagnostic data that | It has been suggested that a RST segment could contain diagnostic data that | |||
explains the cause of the RST. No standard has yet been established for such data. | explains the cause of the RST. No standard has yet been established for such data. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section title="Closing a Connection"> | <section numbered="true" toc="default"> | |||
<t> | <name>Closing a Connection</name> | |||
CLOSE is an operation meaning "I have no more data to send." The | <t> | |||
CLOSE is an operation meaning "I have no more data to send." The | ||||
notion of closing a full-duplex connection is subject to ambiguous | notion of closing a full-duplex connection is subject to ambiguous | |||
interpretation, of course, since it may not be obvious how to treat | interpretation, of course, since it may not be obvious how to treat | |||
the receiving side of the connection. We have chosen to treat CLOSE | the receiving side of the connection. We have chosen to treat CLOSE | |||
in a simplex fashion. The user who CLOSEs may continue to RECEIVE | in a simplex fashion. The user who CLOSEs may continue to RECEIVE | |||
until the TCP receiver is told that the remote peer has CLOSED also. Thus, a program | until the TCP receiver is told that the remote peer has CLOSED also. Thus, a program | |||
could initiate several SENDs followed by a CLOSE, and then continue to | could initiate several SENDs followed by a CLOSE, and then continue to | |||
RECEIVE until signaled that a RECEIVE failed because the remote peer | RECEIVE until signaled that a RECEIVE failed because the remote peer | |||
has CLOSED. The TCP implementation will signal a user, even if no | has CLOSED. The TCP implementation will signal a user, even if no | |||
RECEIVEs are outstanding, that the remote peer has closed, so the user | RECEIVEs are outstanding, that the remote peer has closed, so the user | |||
can terminate their side gracefully. A TCP implementation will reliably deliv er all | can terminate their side gracefully. A TCP implementation will reliably deliv er all | |||
buffers SENT before the connection was CLOSED so a user who expects no | buffers SENT before the connection was CLOSED so a user who expects no | |||
data in return need only wait to hear the connection was CLOSED | data in return need only wait to hear the connection was CLOSED | |||
successfully to know that all their data was received at the destination | successfully to know that all their data was received at the destination | |||
TCP endpoint. Users must keep reading connections they close for sending unti l | TCP endpoint. Users must keep reading connections they close for sending unti l | |||
the TCP implementation indicates there is no more data. | the TCP implementation indicates there is no more data. | |||
</t> | </t> | |||
<t> | <t> | |||
There are essentially three cases: | There are essentially three cases: | |||
</t> | </t> | |||
<t><list> | <ol type="%d)" spacing="normal"> | |||
<t> | <li> | |||
1) The user initiates by telling the TCP implementation to CLOSE the connect | The user initiates by telling the TCP implementation to CLOSE the connection | |||
ion (TCP Peer A in <xref target="normal_close"/>). | (TCP Peer A in <xref target="normal_close" format="default"/>). | |||
</t> | </li> | |||
<t> | <li> | |||
2) The remote TCP endpoint initiates by sending a FIN control signal (TCP Pe | The remote TCP endpoint initiates by sending a FIN control signal (TCP Peer | |||
er B in <xref target="normal_close"/>). | B in <xref target="normal_close" format="default"/>). | |||
</t> | </li> | |||
<t> | <li> | |||
3) Both users CLOSE simultaneously (<xref target="simul_close"/>). | Both users CLOSE simultaneously (<xref target="simul_close" format="default" | |||
/>). | ||||
</li> | ||||
</ol> | ||||
<dl newline="false" spacing="normal"> | ||||
<dt>Case 1:</dt> | ||||
<dd> | ||||
<t> | ||||
Local user initiates the close | ||||
</t> | </t> | |||
</list></t> | <t> | |||
<t><list style="hanging"> | ||||
<t hangText="Case 1: Local user initiates the close"><vspace /> | ||||
<vspace /> | ||||
In this case, a FIN segment can be constructed and placed on the | In this case, a FIN segment can be constructed and placed on the | |||
outgoing segment queue. No further SENDs from the user will be | outgoing segment queue. No further SENDs from the user will be | |||
accepted by the TCP implementation, and it enters the FIN-WAIT-1 state. REC EIVEs | accepted by the TCP implementation, and it enters the FIN-WAIT-1 state. REC EIVEs | |||
are allowed in this state. All segments preceding and including FIN | are allowed in this state. All segments preceding and including FIN | |||
will be retransmitted until acknowledged. When the other TCP peer has | will be retransmitted until acknowledged. When the other TCP peer has | |||
both acknowledged the FIN and sent a FIN of its own, the first TCP peer | both acknowledged the FIN and sent a FIN of its own, the first TCP peer | |||
can ACK this FIN. Note that a TCP endpoint receiving a FIN will ACK but not | can ACK this FIN. Note that a TCP endpoint receiving a FIN will ACK but not | |||
send its own FIN until its user has CLOSED the connection also. | send its own FIN until its user has CLOSED the connection also. | |||
</t> | </t> | |||
<t hangText="Case 2: TCP endpoint receives a FIN from the network"><vspace /> | </dd> | |||
<vspace /> | <dt>Case 2:</dt> | |||
<dd> | ||||
<t> | ||||
TCP endpoint receives a FIN from the network | ||||
</t> | ||||
<t> | ||||
If an unsolicited FIN arrives from the network, the receiving TCP endpoint | If an unsolicited FIN arrives from the network, the receiving TCP endpoint | |||
can ACK it and tell the user that the connection is closing. The | can ACK it and tell the user that the connection is closing. The | |||
user will respond with a CLOSE, upon which the TCP endpoint can send a FIN t o | user will respond with a CLOSE, upon which the TCP endpoint can send a FIN t o | |||
the other TCP peer after sending any remaining data. The TCP endpoint then waits | the other TCP peer after sending any remaining data. The TCP endpoint then waits | |||
until its own FIN is acknowledged whereupon it deletes the | until its own FIN is acknowledged whereupon it deletes the | |||
connection. If an ACK is not forthcoming, after the user timeout | connection. If an ACK is not forthcoming, after the user timeout | |||
the connection is aborted and the user is told. | the connection is aborted and the user is told. | |||
</t> | </t> | |||
<t hangText="Case 3: Both users close simultaneously"><vspace /> | </dd> | |||
<vspace /> | <dt>Case 3:</dt> | |||
<dd> | ||||
<t> | ||||
Both users close simultaneously | ||||
</t> | ||||
<t> | ||||
A simultaneous CLOSE by users at both ends of a connection causes | A simultaneous CLOSE by users at both ends of a connection causes | |||
FIN segments to be exchanged (<xref target="simul_close"/>). When all segme nts preceding the FINs | FIN segments to be exchanged (<xref target="simul_close" format="default"/>) . When all segments preceding the FINs | |||
have been processed and acknowledged, each TCP peer can ACK the FIN it | have been processed and acknowledged, each TCP peer can ACK the FIN it | |||
has received. Both will, upon receiving these ACKs, delete the | has received. Both will, upon receiving these ACKs, delete the | |||
connection. | connection. | |||
</t> | </t> | |||
</list></t> | </dd> | |||
<figure anchor="normal_close" title="Normal Close Sequence"> | </dl> | |||
<artwork> | <figure anchor="normal_close"> | |||
<name>Normal Close Sequence</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. ESTABLISHED ESTABLISHED | 1. ESTABLISHED ESTABLISHED | |||
2. (Close) | 2. (Close) | |||
FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> --> CLOSE-WAIT | FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> --> CLOSE-WAIT | |||
3. FIN-WAIT-2 <-- <SEQ=300><ACK=101><CTL=ACK> <-- CLOSE-WAIT | 3. FIN-WAIT-2 <-- <SEQ=300><ACK=101><CTL=ACK> <-- CLOSE-WAIT | |||
4. (Close) | 4. (Close) | |||
TIME-WAIT <-- <SEQ=300><ACK=101><CTL=FIN,ACK> <-- LAST-ACK | TIME-WAIT <-- <SEQ=300><ACK=101><CTL=FIN,ACK> <-- LAST-ACK | |||
5. TIME-WAIT --> <SEQ=101><ACK=301><CTL=ACK> --> CLOSED | 5. TIME-WAIT --> <SEQ=101><ACK=301><CTL=ACK> --> CLOSED | |||
6. (2 MSL) | 6. (2 MSL) | |||
CLOSED | CLOSED | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<figure anchor="simul_close" title="Simultaneous Close Sequence"> | <figure anchor="simul_close"> | |||
<artwork> | <name>Simultaneous Close Sequence</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
TCP Peer A TCP Peer B | TCP Peer A TCP Peer B | |||
1. ESTABLISHED ESTABLISHED | 1. ESTABLISHED ESTABLISHED | |||
2. (Close) (Close) | 2. (Close) (Close) | |||
FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> ... FI | FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> ... FIN-WAIT-1 | |||
N-WAIT-1 | <-- <SEQ=300><ACK=100><CTL=FIN,ACK> <-- | |||
<-- <SEQ=300><ACK=100><CTL=FIN,ACK> <-- | ... <SEQ=100><ACK=300><CTL=FIN,ACK> --> | |||
... <SEQ=100><ACK=300><CTL=FIN,ACK> --> | ||||
3. CLOSING --> <SEQ=101><ACK=301><CTL=ACK> ... CL | 3. CLOSING --> <SEQ=101><ACK=301><CTL=ACK> ... CLOSING | |||
OSING | <-- <SEQ=301><ACK=101><CTL=ACK> <-- | |||
<-- <SEQ=301><ACK=101><CTL=ACK> <-- | ... <SEQ=101><ACK=301><CTL=ACK> --> | |||
... <SEQ=101><ACK=301><CTL=ACK> --> | ||||
4. TIME-WAIT TIME-WAIT | 4. TIME-WAIT TIME-WAIT | |||
(2 MSL) (2 MSL) | (2 MSL) (2 MSL) | |||
CLOSED CLOSED | CLOSED CLOSED | |||
</artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
A TCP connection may terminate in two ways: (1) the normal | A TCP connection may terminate in two ways: (1) the normal | |||
TCP close sequence using a FIN handshake (<xref target="normal_close "/>), and (2) an "abort" | TCP close sequence using a FIN handshake (<xref target="normal_close " format="default"/>), and (2) an "abort" | |||
in which one or more RST segments are sent and the | in which one or more RST segments are sent and the | |||
connection state is immediately discarded. If the local | connection state is immediately discarded. If the local | |||
TCP connection is closed by the remote side due to a FIN or | TCP connection is closed by the remote side due to a FIN or | |||
RST received from the remote side, then the local | RST received from the remote side, then the local | |||
application MUST be informed whether it closed normally or | application <bcp14>MUST</bcp14> be informed whether it closed normal ly or | |||
was aborted (MUST-12). | was aborted (MUST-12). | |||
</t> | </t> | |||
<t> | <t> | |||
</t> | </t> | |||
<section title="Half-Closed Connections"> | <section numbered="true" toc="default"> | |||
<t> | <name>Half-Closed Connections</name> | |||
<t> | ||||
The normal TCP close sequence delivers buffered data | The normal TCP close sequence delivers buffered data | |||
reliably in both directions. Since the two directions of a | reliably in both directions. Since the two directions of a | |||
TCP connection are closed independently, it is possible for | TCP connection are closed independently, it is possible for | |||
a connection to be "half closed," i.e., closed in only one | a connection to be "half closed", i.e., closed in only one | |||
direction, and a host is permitted to continue sending data | direction, and a host is permitted to continue sending data | |||
in the open direction on a half-closed connection. | in the open direction on a half-closed connection. | |||
</t> | </t> | |||
<t> | <t> | |||
A host MAY implement a "half-duplex" TCP close sequence, s | A host <bcp14>MAY</bcp14> implement a "half-duplex" TCP close sequen | |||
o | ce, so | |||
that an application that has called CLOSE cannot continue to | that an application that has called CLOSE cannot continue to | |||
read data from the connection (MAY-1). If such a host issues a | read data from the connection (MAY-1). If such a host issues a | |||
CLOSE call while received data is still pending in the TCP connectio n, or | CLOSE call while received data is still pending in the TCP connectio n, or | |||
if new data is received after CLOSE is called, its TCP implementatio n | if new data is received after CLOSE is called, its TCP implementatio n | |||
SHOULD send a RST to show that data was lost (SHLD-3). See <xref | <bcp14>SHOULD</bcp14> send a RST to show that data was lost (SHLD-3). | |||
target="RFC2525"/> section 2.17 for discussion. | See <xref target="RFC2525" section="2.17" sectionFormat="comma" format="defaul | |||
t"/> for discussion. | ||||
</t> | </t> | |||
<t> | <t> | |||
When a connection is closed actively, it MUST linger in the | When a connection is closed actively, it <bcp14>MUST</bcp14> linger | |||
in the | ||||
TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime) (MUST-13 ). | TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime) (MUST-13 ). | |||
However, it MAY accept a new SYN from the remote TCP endpoint to | However, it <bcp14>MAY</bcp14> accept a new SYN from the remote TCP endpoint to | |||
reopen the connection directly from TIME-WAIT state (MAY-2), if it: | reopen the connection directly from TIME-WAIT state (MAY-2), if it: | |||
<list> | </t> | |||
<t> | <ol type="(%d)" spacing="normal"> | |||
(1) assigns its initial sequence number for the new | <li> | |||
assigns its initial sequence number for the new | ||||
connection to be larger than the largest sequence | connection to be larger than the largest sequence | |||
number it used on the previous connection incarnation, | number it used on the previous connection incarnation, | |||
and | and | |||
</t> | </li> | |||
<t> | <li> | |||
(2) returns to TIME-WAIT state if the SYN turns out to be | returns to TIME-WAIT state if the SYN turns out to be | |||
an old duplicate. | an old duplicate. | |||
</t> | </li> | |||
</list> | </ol> | |||
</t> | <t> | |||
<t> | ||||
When the TCP Timestamp options are available, an improved algorithm is | When the TCP Timestamp Options are available, an improved algorithm is | |||
described in <xref target="RFC6191"/> in order to support higher connection | described in <xref target="RFC6191" format="default"/> in order to support highe | |||
r connection | ||||
establishment rates. This algorithm for reducing TIME-WAIT is a Best Current | establishment rates. This algorithm for reducing TIME-WAIT is a Best Current | |||
Practice that SHOULD be implemented, since timestamp options are commonly used, | Practice that <bcp14>SHOULD</bcp14> be implemented since Timestamp Options are c ommonly used, | |||
and using them to reduce TIME-WAIT provides benefits for busy Internet servers ( SHLD-4). | and using them to reduce TIME-WAIT provides benefits for busy Internet servers ( SHLD-4). | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Segmentation"> | <name>Segmentation</name> | |||
<t> The term "segmentation" refers to the activity TCP performs wh | <t> The term "segmentation" refers to the activity TCP performs when ing | |||
en ingesting a stream of bytes from a sending application and packetizing that s | esting a stream of bytes from a sending application and packetizing that stream | |||
tream of bytes into TCP segments. Individual TCP segments often do not correspo | of bytes into TCP segments. Individual TCP segments often do not correspond one | |||
nd one-for-one to individual send (or socket write) calls from the application. | -for-one to individual send (or socket write) calls from the application. Appli | |||
Applications may perform writes at the granularity of messages in the upper lay | cations may perform writes at the granularity of messages in the upper-layer pro | |||
er protocol, but TCP guarantees no boundary coherence between the TCP segments s | tocol, but TCP guarantees no correlation between the boundaries of TCP segments | |||
ent and received versus user application data read or write buffer boundaries. | sent and received and the boundaries of the read or write buffers of user applic | |||
In some specific protocols, such as Remote Direct Memory Access (RDMA) using Dir | ation data. In some specific protocols, such as Remote Direct Memory Access (RD | |||
ect Data Placement (DDP) and Marker PDU Aligned Framing (MPA) <xref target="RFC5 | MA) using Direct Data Placement (DDP) and Marker PDU Aligned Framing (MPA) <xref | |||
044"/>, there are performance optimizations possible when the relation between T | target="RFC5044" format="default"/>, there are performance optimizations possib | |||
CP segments and application data units can be controlled, and MPA includes a spe | le when the relation between TCP segments and application data units can be cont | |||
cific mechanism for detecting and verifying this relationship between TCP segmen | rolled, and MPA includes a specific mechanism for detecting and verifying this r | |||
ts and application message data structures, but this is specific to applications | elationship between TCP segments and application message data structures, but th | |||
like RDMA. In general, multiple goals influence the sizing of TCP segments cre | is is specific to applications like RDMA. In general, multiple goals influence | |||
ated by a TCP implementation.</t> | the sizing of TCP segments created by a TCP implementation.</t> | |||
<t>Goals driving the sending of larger segments include: | ||||
<t>Goals driving the sending of larger segments include: | </t> | |||
<list style="symbols"> | <ul spacing="normal"> | |||
<t>Reducing the number of packets in flight within the network.</t> | <li>Reducing the number of packets in flight within the network.</li> | |||
<t>Increasing processing efficiency and potential performance by enabling | <li>Increasing processing efficiency and potential performance by enab | |||
a smaller number of interrupts and inter-layer interactions.</t> | ling a smaller number of interrupts and inter-layer interactions.</li> | |||
<t>Limiting the overhead of TCP headers.</t> | <li>Limiting the overhead of TCP headers.</li> | |||
</list> | </ul> | |||
</t> | <t>Note that the performance benefits of sending larger segments may dec | |||
<t>Note that the performance benefits of sending larger segments may decreas | rease as the size increases, and there may be boundaries where advantages are re | |||
e as the size increases, and there may be boundaries where advantages are revers | versed. For instance, on some implementation architectures, 1025 bytes within a | |||
ed. For instance, on some implementation architectures, 1025 bytes within a seg | segment could lead to worse performance than 1024 bytes, due purely to data ali | |||
ment could lead to worse performance than 1024 bytes, due purely to data alignme | gnment on copy operations.</t> | |||
nt on copy operations.</t> | <t>Goals driving the sending of smaller segments include: | |||
<t>Goals driving the sending of smaller segments include: | </t> | |||
<list style="symbols"> | <ul spacing="normal"> | |||
<t>Avoiding sending a TCP segment that would result in an IP datagram larger | <li>Avoiding sending a TCP segment that would result in an IP datagram | |||
than the smallest MTU along an IP network path, because this results in either | larger than the smallest MTU along an IP network path because this results in | |||
packet loss or packet fragmentation. Making matters worse, some firewalls or m | either packet loss or packet fragmentation. Making matters worse, some firewall | |||
iddleboxes may drop fragmented packets or ICMP messages related to fragmentation | s or middleboxes may drop fragmented packets or ICMP messages related to fragmen | |||
.</t> | tation.</li> | |||
<t>Preventing delays to the application data stream, especially when TCP is | <li>Preventing delays to the application data stream, especially when | |||
waiting on the application to generate more data, or when the application is wai | TCP is waiting on the application to generate more data, or when the application | |||
ting on an event or input from its peer in order to generate more data.</t> | is waiting on an event or input from its peer in order to generate more data.</ | |||
<t>Enabling "fate sharing" between TCP segments and lower-layer da | li> | |||
ta units (e.g. below IP, for links with cell or frame sizes smaller than the IP | <li>Enabling "fate sharing" between TCP segments and lower-layer data | |||
MTU).</t> | units (e.g., below IP, for links with cell or frame sizes smaller than the IP MT | |||
</list> | U).</li> | |||
</t> | </ul> | |||
<t>Towards meeting these competing sets of goals, TCP includes several m | ||||
<t>Towards meeting these competing sets of goals, TCP includes several mecha | echanisms, including the Maximum Segment Size Option, Path MTU Discovery, the Na | |||
nisms, including the Maximum Segment Size option, Path MTU Discovery, the Nagle | gle algorithm, and support for IPv6 Jumbograms, as discussed in the following su | |||
algorithm, and support for IPv6 Jumbograms, as discussed in the following subsec | bsections.</t> | |||
tions.</t> | <section anchor="mss" numbered="true" toc="default"> | |||
<name>Maximum Segment Size Option</name> | ||||
<section title="Maximum Segment Size Option" anchor="mss"> | <t> | |||
<t> | TCP endpoints <bcp14>MUST</bcp14> implement both sending and receiving the M | |||
TCP endpoints MUST implement both sending and receiving the MSS option (MUST | SS Option (MUST-14). | |||
-14). | </t> | |||
</t> | <t> | |||
<t> | TCP implementations <bcp14>SHOULD</bcp14> send an MSS Option in | |||
TCP implementations SHOULD send an MSS option in | ||||
every SYN segment when its receive MSS differs from the | every SYN segment when its receive MSS differs from the | |||
default 536 for IPv4 or 1220 for IPv6 (SHLD-5), and MAY send it always (MAY- | default 536 for IPv4 or 1220 for IPv6 (SHLD-5), and <bcp14>MAY</bcp14> send | |||
3). | it always (MAY-3). | |||
</t> | </t> | |||
<t> | <t> | |||
If an MSS option is not received at connection setup, TCP implementations | If an MSS Option is not received at connection setup, TCP implementations | |||
MUST assume a default send MSS of 536 (576 - 40) for IPv4 or 1220 (1280 - 60 | <bcp14>MUST</bcp14> assume a default send MSS of 536 (576 - 40) for IPv4 or | |||
) for IPv6 (MUST-15). | 1220 (1280 - 60) for IPv6 (MUST-15). | |||
</t> | </t> | |||
<t> | <t> | |||
The maximum size of a segment that TCP endpoint really sends, the | The maximum size of a segment that a TCP endpoint really sends, the | |||
"effective send MSS," MUST be the smaller (MUST-16) of the send MS | "effective send MSS", <bcp14>MUST</bcp14> be the smaller (MUST-16) of the se | |||
S | nd MSS | |||
(that reflects the available reassembly buffer size at the | (that reflects the available reassembly buffer size at the | |||
remote host, the EMTU_R <xref target="RFC1122"/>) and the largest transmissi | remote host, the EMTU_R <xref target="RFC1122" format="default"/>) and the l | |||
on size permitted by the IP layer (EMTU_S <xref target="RFC1122"/>): | argest transmission size permitted by the IP layer (EMTU_S <xref target="RFC1122 | |||
<list style="hanging" hangIndent="4"> | " format="default"/>): | |||
<t>Eff.snd.MSS = | </t> | |||
<list style="hanging" hangIndent="4"> | <t> | |||
<t>min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize</t> | Eff.snd.MSS = min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize | |||
</list> | </t> | |||
</t> | <t> | |||
</list> | ||||
where: | where: | |||
<list style="symbols"> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
SendMSS is the MSS value received from the remote host, | SendMSS is the MSS value received from the remote host, | |||
or the default 536 for IPv4 or 1220 for IPv6, if no MSS option is receiv | or the default 536 for IPv4 or 1220 for IPv6, if no MSS Option is receiv | |||
ed. | ed. | |||
</t> | </li> | |||
<t> | <li> | |||
MMS_S is the maximum size for a transport-layer message | MMS_S is the maximum size for a transport-layer message | |||
that TCP may send. | that TCP may send. | |||
</t> | </li> | |||
<t> | <li> | |||
TCPhdrsize is the size of the fixed TCP header and any options. This is | TCPhdrsize is the size of the fixed TCP header and any options. This is | |||
20 in the (rare) case that no options are present, but may be larger if TCP opt | 20 in the (rare) case that no options are present but may be larger if TCP Opti | |||
ions are to be sent. Note that some options might not be included on all segmen | ons are to be sent. Note that some options might not be included on all segment | |||
ts, but that for each segment sent, the sender should adjust the data length acc | s, but that for each segment sent, the sender should adjust the data length acco | |||
ordingly, within the Eff.snd.MSS. | rdingly, within the Eff.snd.MSS. | |||
</t> | </li> | |||
<t> | <li> | |||
IPoptionsize is the size of any IPv4 options or IPv6 extension headers a ssociated with a TCP connection. Note that some options or extension headers mi ght not be included on all packets, but that for each segment sent, the sender s hould adjust the data length accordingly, within the Eff.snd.MSS. | IPoptionsize is the size of any IPv4 options or IPv6 extension headers a ssociated with a TCP connection. Note that some options or extension headers mi ght not be included on all packets, but that for each segment sent, the sender s hould adjust the data length accordingly, within the Eff.snd.MSS. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | The MSS value to be sent in an MSS Option should be equal to the | |||
The MSS value to be sent in an MSS option should be equal to the | ||||
effective MTU minus the fixed IP and TCP headers. By ignoring both | effective MTU minus the fixed IP and TCP headers. By ignoring both | |||
IP and TCP options when calculating the value for the MSS option, if | IP and TCP Options when calculating the value for the MSS Option, if | |||
there are any IP or TCP options to be sent in a packet, then the | there are any IP or TCP Options to be sent in a packet, then the | |||
sender must decrease the size of the TCP data accordingly. RFC 6691 <xref ta | sender must decrease the size of the TCP data accordingly. RFC 6691 <xref ta | |||
rget="RFC6691"/> | rget="RFC6691" format="default"/> | |||
discusses this in greater detail. | discusses this in greater detail. | |||
</t> | </t> | |||
<t> | <t> | |||
The MSS value to be sent in an MSS option must be less than | The MSS value to be sent in an MSS Option must be less than | |||
or equal to: | or equal to: | |||
<list> | </t> | |||
<t>MMS_R - 20</t> | <t indent="3"> | |||
</list> | MMS_R - 20 | |||
</t> | ||||
<t> | ||||
where MMS_R is the maximum size for a transport-layer | where MMS_R is the maximum size for a transport-layer | |||
message that can be received (and reassembled at the IP layer) (MUST-67). T CP obtains | message that can be received (and reassembled at the IP layer) (MUST-67). T CP obtains | |||
MMS_R and MMS_S from the IP layer; see the generic call | MMS_R and MMS_S from the IP layer; see the generic call | |||
GET_MAXSIZES in Section 3.4 of RFC 1122. These are defined in terms of thei | GET_MAXSIZES in Section <xref target="RFC1122" section="3.4" sectionFormat=" | |||
r IP MTU equivalents, EMTU_R and EMTU_S <xref target="RFC1122"/>. | bare" format="default"/> of RFC 1122. These are defined in terms of their IP MT | |||
</t> | U equivalents, EMTU_R and EMTU_S <xref target="RFC1122" format="default"/>. | |||
<t> | </t> | |||
<t> | ||||
When TCP is used in a situation where either the IP or TCP headers | When TCP is used in a situation where either the IP or TCP headers | |||
are not fixed, the sender must reduce the amount of TCP data in | are not fixed, the sender must reduce the amount of TCP data in | |||
any given packet by the number of octets used by the IP and TCP | any given packet by the number of octets used by the IP and TCP | |||
options. This has been a point of confusion historically, as explained in R | options. This has been a point of confusion historically, as explained in R | |||
FC 6691, Section 3.1. | FC 6691, Section <xref target="RFC6691" section="3.1" sectionFormat="bare" forma | |||
</t> | t="default"/>. | |||
</section> | </t> | |||
<section title="Path MTU Discovery" anchor="pmtud"> | </section> | |||
<section anchor="pmtud" numbered="true" toc="default"> | ||||
<t>A TCP implementation may be aware of the MTU on directly connected links, | <name>Path MTU Discovery</name> | |||
but will rarely have insight about MTUs across an entire network path. For IPv4 | <t>A TCP implementation may be aware of the MTU on directly connected | |||
, RFC 1122 recommends an IP-layer default effective MTU of less than or equal to | links, but will rarely have insight about MTUs across an entire network path. F | |||
576 for destinations not directly connected, and for IPv6 this would be 1280. | or IPv4, RFC 1122 recommends an IP-layer default effective MTU of less than or e | |||
Using these fixed values limits TCP connection performance and efficiency. Inst | qual to 576 for destinations not directly connected, and for IPv6 this would be | |||
ead, implementation of Path MTU Discovery (PMTUD) and Packetization Layer Path M | 1280. Using these fixed values limits TCP connection performance and efficiency | |||
TU Discovery (PLPMTUD) is strongly recommended in order for TCP to improve segme | . Instead, implementation of Path MTU Discovery (PMTUD) and Packetization Layer | |||
ntation decisions. Both PMTUD and PLPMTUD help TCP choose segment sizes that av | Path MTU Discovery (PLPMTUD) is strongly recommended in order for TCP to improv | |||
oid both on-path (for IPv4) and source fragmentation (IPv4 and IPv6).</t> | e segmentation decisions. Both PMTUD and PLPMTUD help TCP choose segment sizes | |||
that avoid both on-path (for IPv4) and source fragmentation (IPv4 and IPv6).</t> | ||||
<t>PMTUD for IPv4 <xref target="RFC1191"/> or IPv6 <xref target="RFC8201"/> i | <t>PMTUD for IPv4 <xref target="RFC1191" format="default"/> or IPv6 <x | |||
s implemented in conjunction between TCP, IP, and ICMP protocols. It relies bot | ref target="RFC8201" format="default"/> is implemented in conjunction between TC | |||
h on avoiding source fragmentation and setting the IPv4 DF (don't fragment) flag | P, IP, and ICMP. It relies both on avoiding source fragmentation and setting th | |||
, the latter to inhibit on-path fragmentation. It relies on ICMP errors from ro | e IPv4 DF (don't fragment) flag, the latter to inhibit on-path fragmentation. I | |||
uters along the path, whenever a segment is too large to traverse a link. Sever | t relies on ICMP errors from routers along the path whenever a segment is too la | |||
al adjustments to a TCP implementation with PMTUD are described in RFC 2923 in o | rge to traverse a link. Several adjustments to a TCP implementation with PMTUD | |||
rder to deal with problems experienced in practice <xref target="RFC2923"/>. PL | are described in RFC 2923 in order to deal with problems experienced in practice | |||
PMTUD <xref target="RFC4821"/> is a Standards Track improvement to PMTUD that re | <xref target="RFC2923" format="default"/>. PLPMTUD <xref target="RFC4821" form | |||
laxes the requirement for ICMP support across a path, and improves performance i | at="default"/> is a Standards Track improvement to PMTUD that relaxes the requir | |||
n cases where ICMP is not consistently conveyed, but still tries to avoid source | ement for ICMP support across a path, and improves performance in cases where IC | |||
fragmentation. The mechanisms in all four of these RFCs are recommended to be | MP is not consistently conveyed, but still tries to avoid source fragmentation. | |||
included in TCP implementations.</t> | The mechanisms in all four of these RFCs are recommended to be included in TCP | |||
implementations.</t> | ||||
<t> | <t> | |||
The TCP MSS option specifies an upper bound for the size of packets | The TCP MSS Option specifies an upper bound for the size of packets | |||
that can be received (see <xref target="RFC6691"/>). Hence, setting the valu | that can be received (see <xref target="RFC6691" format="default"/>). Hence, | |||
e in the MSS option too | setting the value in the MSS Option too | |||
small can impact the ability for PMTUD or PLPMTUD to find a larger | small can impact the ability for PMTUD or PLPMTUD to find a larger | |||
path MTU. RFC 1191 discusses this implication of many older TCP implementati ons setting the TCP MSS to 536 (corresponding to the IPv4 576 byte default MTU) for non-local destinations, rather than deriving it from the MTUs of connected i nterfaces as recommended. | path MTU. RFC 1191 discusses this implication of many older TCP implementati ons setting the TCP MSS to 536 (corresponding to the IPv4 576 byte default MTU) for non-local destinations, rather than deriving it from the MTUs of connected i nterfaces as recommended. | |||
</t> | </t> | |||
</section> | ||||
</section> | <section numbered="true" toc="default"> | |||
<section title="Interfaces with Variable MTU Values"> | <name>Interfaces with Variable MTU Values</name> | |||
<t> | <t> | |||
The effective MTU can sometimes vary, as when used with variable | The effective MTU can sometimes vary, as when used with variable | |||
compression, e.g., RObust Header Compression (ROHC) <xref target="RFC5795"/>. It is | compression, e.g., RObust Header Compression (ROHC) <xref target="RFC5795" fo rmat="default"/>. It is | |||
tempting for a TCP implementation to advertise the largest possible MSS, to | tempting for a TCP implementation to advertise the largest possible MSS, to | |||
support the most efficient use of compressed payloads. | support the most efficient use of compressed payloads. | |||
Unfortunately, some compression schemes occasionally need to transmit | Unfortunately, some compression schemes occasionally need to transmit | |||
full headers (and thus smaller payloads) to resynchronize state at | full headers (and thus smaller payloads) to resynchronize state at | |||
their endpoint compressors/decompressors. If the largest MTU is used | their endpoint compressors/decompressors. If the largest MTU is used | |||
to calculate the value to advertise in the MSS option, TCP | to calculate the value to advertise in the MSS Option, TCP | |||
retransmission may interfere with compressor resynchronization. | retransmission may interfere with compressor resynchronization. | |||
</t> | </t> | |||
<t> | <t> | |||
As a result, when the effective MTU of an interface varies packet-to-packet, TCP implementations | As a result, when the effective MTU of an interface varies packet-to-packet, TCP implementations | |||
SHOULD use the smallest effective MTU of the interface to calculate | <bcp14>SHOULD</bcp14> use the smallest effective MTU of the interface to calc | |||
the value to advertise in the MSS option (SHLD-6). | ulate | |||
</t> | the value to advertise in the MSS Option (SHLD-6). | |||
</section> | </t> | |||
<section title="Nagle Algorithm" anchor="nagle"> | </section> | |||
<t>The "Nagle algorithm" was described in RFC 896 <xref target="RF | <section anchor="nagle" numbered="true" toc="default"> | |||
C0896"/> and was recommended in RFC 1122 <xref target="RFC1122"/> for mitigation | <name>Nagle Algorithm</name> | |||
of an early problem of too many small packets being generated. It has been imp | <t>The "Nagle algorithm" was described in RFC 896 <xref target="RFC089 | |||
lemented in most current TCP code bases, sometimes with minor variations (see <x | 6" format="default"/> and was recommended in RFC 1122 <xref target="RFC1122" for | |||
ref target="minshall"/>).</t> | mat="default"/> for mitigation of an early problem of too many small packets bei | |||
<t>If there is unacknowledged data (i.e., SND.NXT > SND.UNA), then the se | ng generated. It has been implemented in most current TCP code bases, sometimes | |||
nding TCP endpoint buffers all user data (regardless of the PSH bit), until the | with minor variations (see <xref target="minshall" format="default"/>).</t> | |||
outstanding data has been acknowledged or until the TCP endpoint can send a full | <t>If there is unacknowledged data (i.e., SND.NXT > SND.UNA), then | |||
-sized segment (Eff.snd.MSS bytes).</t> | the sending TCP endpoint buffers all user data (regardless of the PSH bit) until | |||
<t>A TCP implementation SHOULD implement the Nagle Algorithm to coalesce sho | the outstanding data has been acknowledged or until the TCP endpoint can send a | |||
rt segments (SHLD-7). However, there MUST be a way for an application to disabl | full-sized segment (Eff.snd.MSS bytes).</t> | |||
e the Nagle algorithm on an individual connection (MUST-17). In all cases, send | <t>A TCP implementation <bcp14>SHOULD</bcp14> implement the Nagle algo | |||
ing data is also subject to the limitation imposed by the Slow Start algorithm < | rithm to coalesce short segments (SHLD-7). However, there <bcp14>MUST</bcp14> b | |||
xref target="RFC5681"/>.</t> | e a way for an application to disable the Nagle algorithm on an individual conne | |||
<t> | ction (MUST-17). In all cases, sending data is also subject to the limitation i | |||
Since there can be problematic interactions between the Nagle Algorithm and | mposed by the slow start algorithm <xref target="RFC5681" format="default"/>.</t | |||
delayed acknowledgements, some implementations use minor variations of the Nagle | > | |||
algorithm, such as the one described in <xref target="minshall"/>. | <t> | |||
</t> | Since there can be problematic interactions between the Nagle algorithm and | |||
</section> | delayed acknowledgments, some implementations use minor variations of the Nagle | |||
<section title="IPv6 Jumbograms"> | algorithm, such as the one described in <xref target="minshall" format="default" | |||
<t> | />. | |||
</t> | ||||
</section> | ||||
<section numbered="true" toc="default"> | ||||
<name>IPv6 Jumbograms</name> | ||||
<t> | ||||
In order to support TCP over IPv6 Jumbograms, implementations need to | In order to support TCP over IPv6 Jumbograms, implementations need to | |||
be able to send TCP segments larger than the 64KB limit that the MSS option c an convey. RFC 2675 <xref target="RFC2675"/> | be able to send TCP segments larger than the 64-KB limit that the MSS Option can convey. RFC 2675 <xref target="RFC2675" format="default"/> | |||
defines that an MSS value of 65,535 bytes is to be treated as infinity, and P ath | defines that an MSS value of 65,535 bytes is to be treated as infinity, and P ath | |||
MTU Discovery <xref target="RFC8201"/> is used to determine the actual MSS. | MTU Discovery <xref target="RFC8201" format="default"/> is used to determine | |||
</t> | the actual MSS. | |||
<t> | </t> | |||
The Jumbo Payload option need not be implemented or understood by IPv6 nodes | <t> | |||
that do not support attachment to links with a MTU greater than 65,575 <xref tar | The Jumbo Payload Option need not be implemented or understood by IPv6 nodes | |||
get="RFC2675"/>, and the present IPv6 Node Requirements does not include support | that do not support attachment to links with an MTU greater than 65,575 <xref ta | |||
for Jumbograms <xref target="RFC8504"/>. | rget="RFC2675" format="default"/>, and the present IPv6 Node Requirements does n | |||
</t> | ot include support for Jumbograms <xref target="RFC8504" format="default"/>. | |||
</section> | </t> | |||
</section> | </section> | |||
<section title="Data Communication" anchor="datacomm"> | </section> | |||
<t> | <section anchor="datacomm" numbered="true" toc="default"> | |||
Once the connection is established data is communicated by the | <name>Data Communication</name> | |||
<t> | ||||
Once the connection is established, data is communicated by the | ||||
exchange of segments. Because segments may be lost due to errors | exchange of segments. Because segments may be lost due to errors | |||
(checksum test failure), or network congestion, TCP uses | (checksum test failure) or network congestion, TCP uses | |||
retransmission to ensure delivery of every segment. | retransmission to ensure delivery of every segment. | |||
Duplicate segments may arrive due to network or TCP retransmission. | Duplicate segments may arrive due to network or TCP retransmission. | |||
As discussed in the section on sequence numbers, the TCP implementation perfor ms | As discussed in the section on sequence numbers (<xref target="sequence-number s"/>), the TCP implementation performs | |||
certain tests on the sequence and acknowledgment numbers in the | certain tests on the sequence and acknowledgment numbers in the | |||
segments to verify their acceptability. | segments to verify their acceptability. | |||
</t> | </t> | |||
<t> | <t> | |||
The sender of data keeps track of the next sequence number to use in | The sender of data keeps track of the next sequence number to use in | |||
the variable SND.NXT. The receiver of data keeps track of the next | the variable SND.NXT. The receiver of data keeps track of the next | |||
sequence number to expect in the variable RCV.NXT. The sender of data | sequence number to expect in the variable RCV.NXT. The sender of data | |||
keeps track of the oldest unacknowledged sequence number in the | keeps track of the oldest unacknowledged sequence number in the | |||
variable SND.UNA. If the data flow is momentarily idle and all data | variable SND.UNA. If the data flow is momentarily idle and all data | |||
sent has been acknowledged then the three variables will be equal. | sent has been acknowledged, then the three variables will be equal. | |||
</t> | </t> | |||
<t> | <t> | |||
When the sender creates a segment and transmits it the sender advances | When the sender creates a segment and transmits it, the sender advances | |||
SND.NXT. When the receiver accepts a segment it advances RCV.NXT and | SND.NXT. When the receiver accepts a segment, it advances RCV.NXT and | |||
sends an acknowledgment. When the data sender receives an | sends an acknowledgment. When the data sender receives an | |||
acknowledgment it advances SND.UNA. The extent to which the values of | acknowledgment, it advances SND.UNA. The extent to which the values of | |||
these variables differ is a measure of the delay in the communication. | these variables differ is a measure of the delay in the communication. | |||
The amount by which the variables are advanced is the length of the | The amount by which the variables are advanced is the length of the | |||
data and SYN or FIN flags in the segment. Note that once in the ESTABLISHED s tate all | data and SYN or FIN flags in the segment. Note that, once in the ESTABLISHED state, all | |||
segments must carry current acknowledgment information. | segments must carry current acknowledgment information. | |||
</t> | </t> | |||
<t> | <t> | |||
The CLOSE user call implies a push function (see <xref target="user-api"/>), a | The CLOSE user call implies a push function (see <xref target="user-api" forma | |||
s does the FIN control | t="default"/>), as does the FIN control | |||
flag in an incoming segment. | flag in an incoming segment. | |||
</t> | </t> | |||
<section title="Retransmission Timeout" anchor="RTO"> | <section anchor="RTO" numbered="true" toc="default"> | |||
<t> | <name>Retransmission Timeout</name> | |||
<t> | ||||
Because of the variability of the networks that compose an | Because of the variability of the networks that compose an | |||
internetwork system and the wide range of uses of TCP connections the | internetwork system and the wide range of uses of TCP connections, the | |||
retransmission timeout (RTO) must be dynamically determined. | retransmission timeout (RTO) must be dynamically determined. | |||
</t> | </t> | |||
<t> | <t> | |||
The RTO MUST be computed according to the | The RTO <bcp14>MUST</bcp14> be computed according to the | |||
algorithm in <xref target="RFC6298"/>, including Karn's algorithm for taki | algorithm in <xref target="RFC6298" format="default"/>, including Karn's a | |||
ng RTT samples (MUST-18). | lgorithm for taking RTT samples (MUST-18). | |||
</t> | </t> | |||
<t> | <t> | |||
RFC 793 contains an early example procedure for computing the RTO, based o | RFC 793 contains an early example procedure for computing the RTO, based o | |||
n work mentioned in IEN 177 <xref target="IEN177"/>. This was then replaced by | n work mentioned in IEN 177 <xref target="IEN177" format="default"/>. This was | |||
the algorithm described in RFC 1122, and subsequently updated in RFC 2988, and t | then replaced by the algorithm described in RFC 1122, which was subsequently upd | |||
hen again in RFC 6298. | ated in RFC 2988 and then again in RFC 6298. | |||
</t> | </t> | |||
<t> | <t> | |||
RFC 1122 allows that if a retransmitted packet is identical to the original | RFC 1122 allows that if a retransmitted packet is identical to the original | |||
packet (which implies not only that the data boundaries have not changed, but | packet (which implies not only that the data boundaries have not changed, but | |||
also that none of the headers have changed), then the same IPv4 Identification | also that none of the headers have changed), then the same IPv4 Identification | |||
field MAY be used (see Section 3.2.1.5 of RFC 1122) (MAY-4). The same IP | field <bcp14>MAY</bcp14> be used (see Section <xref target="RFC1122" section="3. | |||
identification field may be reused anyways, since it is only meaningful when a | 2.1.5" sectionFormat="bare" format="default"/> of RFC 1122) (MAY-4). The same I | |||
datagram is fragmented <xref target="RFC6864"/>. TCP implementations should not | P | |||
rely on or typically | Identification field may be reused anyways since it is only meaningful when a | |||
datagram is fragmented <xref target="RFC6864" format="default"/>. TCP implement | ||||
ations should not rely on or typically | ||||
interact with this IPv4 header field in any way. It is not a reasonable way to | interact with this IPv4 header field in any way. It is not a reasonable way to | |||
either indicate duplicate sent segments, nor to identify duplicate received | indicate duplicate sent segments nor to identify duplicate received | |||
segments. | segments. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="TCP Congestion Control"> | <section numbered="true" toc="default"> | |||
<t>RFC 2914 <xref target="RFC2914"/> explains the importance of congestion contr | <name>TCP Congestion Control</name> | |||
ol for the Internet.</t> | <t>RFC 2914 <xref target="RFC2914" format="default"/> explains the imp | |||
ortance of congestion control for the Internet.</t> | ||||
<t>RFC 1122 required implementation of Van Jacobson's congestion control algorit | <t>RFC 1122 required implementation of Van Jacobson's congestion contr | |||
hms slow start and congestion avoidance together with exponential back-off for s | ol algorithms slow start and congestion avoidance together with exponential back | |||
uccessive RTO values for the same segment. RFC 2581 provided IETF Standards Tra | off for successive RTO values for the same segment. RFC 2581 provided IETF Stan | |||
ck description of slow start and congestion avoidance, along with fast retransmi | dards Track description of slow start and congestion avoidance, along with fast | |||
t and fast recovery. RFC 5681 is the current description of these algorithms an | retransmit and fast recovery. RFC 5681 is the current description of these algo | |||
d is the current Standards Track specification providing guidelines for TCP cong | rithms and is the current Standards Track specification providing guidelines for | |||
estion control. RFC 6298 describes exponential back-off of RTO values, including | TCP congestion control. RFC 6298 describes exponential backoff of RTO values, i | |||
keeping the backed-off value until a subsequent segment with new data has been | ncluding keeping the backed-off value until a subsequent segment with new data h | |||
sent and acknowledged without retransmission.</t> | as been sent and acknowledged without retransmission.</t> | |||
<t>A TCP endpoint <bcp14>MUST</bcp14> implement the basic congestion c | ||||
<t>A TCP endpoint MUST implement the basic congestion control algorithms slow st | ontrol algorithms slow start, congestion avoidance, and exponential backoff of R | |||
art, congestion avoidance, and exponential back-off of RTO to avoid creating con | TO to avoid creating congestion collapse conditions (MUST-19). RFC 5681 and RFC | |||
gestion collapse conditions (MUST-19). RFC 5681 and RFC 6298 describe the basic | 6298 describe the basic algorithms on the IETF Standards Track that are broadly | |||
algorithms on the IETF Standards Track that are broadly applicable. Multiple o | applicable. Multiple other suitable algorithms exist and have been widely used | |||
ther suitable algorithms exist and have been widely used. Many TCP implementati | . Many TCP implementations support a set of alternative algorithms that can be | |||
ons support a set of alternative algorithms that can be configured for use on th | configured for use on the endpoint. An endpoint <bcp14>MAY</bcp14> implement su | |||
e endpoint. An endpoint MAY implement such alternative algorithms provided that | ch alternative algorithms provided that the algorithms are conformant with the T | |||
the algorithms are conformant with the TCP specifications from the IETF Standar | CP specifications from the IETF Standards Track as described in RFC 2914, RFC 50 | |||
ds Track as described in RFC 2914, RFC 5033 <xref target="RFC5033"/>, and RFC 89 | 33 <xref target="RFC5033" format="default"/>, and RFC 8961 <xref target="RFC8961 | |||
61 <xref target="RFC8961"/> (MAY-18).</t> | " format="default"/> (MAY-18).</t> | |||
<t>Explicit Congestion Notification (ECN) was defined in RFC 3168 and | ||||
<t>Explicit Congestion Notification (ECN) was defined in RFC 3168 and is an IETF | is an IETF Standards Track enhancement that has many benefits <xref target="RFC8 | |||
Standards Track enhancement that has many benefits <xref target="RFC8087"/>.</t | 087" format="default"/>.</t> | |||
> | <t>A TCP endpoint <bcp14>SHOULD</bcp14> implement ECN as described in | |||
RFC 3168 (SHLD-8).</t> | ||||
<t>A TCP endpoint SHOULD implement ECN as described in RFC 3168 (SHLD-8).</t> | </section> | |||
<section anchor="connfail" numbered="true" toc="default"> | ||||
</section> | <name>TCP Connection Failures</name> | |||
<section title="TCP Connection Failures" anchor="connfail"> | <t> | |||
<t> | ||||
Excessive retransmission of the same segment by a TCP endpoint | Excessive retransmission of the same segment by a TCP endpoint | |||
indicates some failure of the remote host or the Internet | indicates some failure of the remote host or the internetwork | |||
path. This failure may be of short or long duration. The | path. This failure may be of short or long duration. The | |||
following procedure MUST be used to handle excessive | following procedure <bcp14>MUST</bcp14> be used to handle excessive | |||
retransmissions of data segments (MUST-20): | retransmissions of data segments (MUST-20): | |||
</t> | </t> | |||
<t> | <ol type="(%c)" spacing="normal"> | |||
<list> | <li> | |||
<t> | There are two thresholds R1 and R2 measuring the amount | |||
(a) There are two thresholds R1 and R2 measuring the amount | ||||
of retransmission that has occurred for the same | of retransmission that has occurred for the same | |||
segment. R1 and R2 might be measured in time units or | segment. R1 and R2 might be measured in time units or | |||
as a count of retransmissions (with the current RTO and | as a count of retransmissions (with the current RTO and | |||
corresponding backoffs as a conversion factor, if needed). | corresponding backoffs as a conversion factor, if needed). | |||
</t> | </li> | |||
<t> | <li> | |||
(b) When the number of transmissions of the same segment | When the number of transmissions of the same segment | |||
reaches or exceeds threshold R1, pass negative advice | reaches or exceeds threshold R1, pass negative advice | |||
(see Section 3.3.1.4 of <xref target="RFC1122"/>) to the IP lay er, to trigger | (see <xref target="RFC1122" section="3.3.1.4" sectionFormat="of " format="default"/>) to the IP layer, to trigger | |||
dead-gateway diagnosis. | dead-gateway diagnosis. | |||
</t> | </li> | |||
<t> | <li> | |||
(c) When the number of transmissions of the same segment | When the number of transmissions of the same segment | |||
reaches a threshold R2 greater than R1, close the | reaches a threshold R2 greater than R1, close the | |||
connection. | connection. | |||
</t> | </li> | |||
<t> | <li> | |||
(d) An application MUST (MUST-21) be able to set the value for R2 f | An application <bcp14>MUST</bcp14> (MUST-21) be able to set the | |||
or | value for R2 for | |||
a particular connection. For example, an interactive | a particular connection. For example, an interactive | |||
application might set R2 to "infinity," giving the user | application might set R2 to "infinity", giving the user | |||
control over when to disconnect. | control over when to disconnect. | |||
</t> | </li> | |||
<t> | <li> | |||
(e) TCP implementations SHOULD inform the application of the delive | TCP implementations <bcp14>SHOULD</bcp14> inform the applicatio | |||
ry | n of the delivery | |||
problem (unless such information has been disabled by | problem (unless such information has been disabled by | |||
the application; see Asynchronous Reports section), when R1 is | the application; see the "Asynchronous Reports" section (<xref target="asynchronous-reports"/>)), when R1 is | |||
reached and before R2 (SHLD-9). This will allow a remote login | reached and before R2 (SHLD-9). This will allow a remote login | |||
application program to inform the user, | application program to inform the user, | |||
for example. | for example. | |||
</t> | </li> | |||
</list></t> | </ol> | |||
<t> | <t> | |||
The value of R1 SHOULD correspond to at least 3 | The value of R1 <bcp14>SHOULD</bcp14> correspond to at least 3 | |||
retransmissions, at the current RTO (SHLD-10). The value of R2 SHOU | retransmissions, at the current RTO (SHLD-10). The value of R2 <bcp | |||
LD | 14>SHOULD</bcp14> | |||
correspond to at least 100 seconds (SHLD-11). | correspond to at least 100 seconds (SHLD-11). | |||
</t> | </t> | |||
<t> | <t> | |||
An attempt to open a TCP connection could fail with | An attempt to open a TCP connection could fail with | |||
excessive retransmissions of the SYN segment or by receipt | excessive retransmissions of the SYN segment or by receipt | |||
of a RST segment or an ICMP Port Unreachable. SYN | of a RST segment or an ICMP Port Unreachable. SYN | |||
retransmissions MUST be handled in the general way just | retransmissions <bcp14>MUST</bcp14> be handled in the general way ju st | |||
described for data retransmissions, including notification | described for data retransmissions, including notification | |||
of the application layer. | of the application layer. | |||
</t> | </t> | |||
<t> | <t> | |||
However, the values of R1 and R2 may be different for SYN | However, the values of R1 and R2 may be different for SYN | |||
and data segments. In particular, R2 for a SYN segment MUST | and data segments. In particular, R2 for a SYN segment <bcp14>MUST< /bcp14> | |||
be set large enough to provide retransmission of the segment | be set large enough to provide retransmission of the segment | |||
for at least 3 minutes (MUST-23). The application can close the | for at least 3 minutes (MUST-23). The application can close the | |||
connection (i.e., give up on the open attempt) sooner, of | connection (i.e., give up on the open attempt) sooner, of | |||
course. | course. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="TCP Keep-Alives"> | <section numbered="true" toc="default"> | |||
<t> | <name>TCP Keep-Alives</name> | |||
<t> | ||||
A TCP connection is said to be "idle" if for some long | A TCP connection is said to be "idle" if for some long | |||
amount of time there have been no incoming segments received and | amount of time there have been no incoming segments received and | |||
there is no new or unacknowledged data to be sent. | there is no new or unacknowledged data to be sent. | |||
</t> | </t> | |||
<t> | <t> | |||
Implementors MAY include "keep-alives" in their TCP implementations | Implementers <bcp14>MAY</bcp14> include "keep-alives" in their TCP im | |||
plementations | ||||
(MAY-5), although this practice is not universally accepted. Some | (MAY-5), although this practice is not universally accepted. Some | |||
TCP implementations, however, have included a keep-alive mechanism. | TCP implementations, however, have included a keep-alive mechanism. | |||
To confirm that an idle connection is still active, these | To confirm that an idle connection is still active, these | |||
implementations send a probe segment designed to elicit a response | implementations send a probe segment designed to elicit a response | |||
from the TCP peer. Such a segment generally contains SEG.SEQ = | from the TCP peer. Such a segment generally contains SEG.SEQ = | |||
SND.NXT-1 and may or may not contain one garbage octet of data. | SND.NXT-1 and may or may not contain one garbage octet of data. | |||
If keep-alives are included, the application MUST be able to turn | If keep-alives are included, the application <bcp14>MUST</bcp14> be a | |||
them on or off for each TCP connection (MUST-24), and they MUST | ble to turn | |||
them on or off for each TCP connection (MUST-24), and they <bcp14>MUS | ||||
T</bcp14> | ||||
default to off (MUST-25). | default to off (MUST-25). | |||
</t> | </t> | |||
<t> | <t> | |||
Keep-alive packets MUST only be sent when no sent data is outstandin | Keep-alive packets <bcp14>MUST</bcp14> only be sent when no sent dat | |||
g, | a is outstanding, | |||
and no data or | and no data or | |||
acknowledgement packets have been received for the | acknowledgment packets have been received for the | |||
connection within an interval (MUST-26). This interval MUST be | connection within an interval (MUST-26). This interval <bcp14>MUST< | |||
configurable (MUST-27) and MUST default to no less than two hours (M | /bcp14> be | |||
UST-28). | configurable (MUST-27) and <bcp14>MUST</bcp14> default to no less th | |||
an two hours (MUST-28). | ||||
</t> | </t> | |||
<t> | <t> | |||
It is extremely important to remember that ACK segments that | It is extremely important to remember that ACK segments that | |||
contain no data are not reliably transmitted by TCP. | contain no data are not reliably transmitted by TCP. | |||
Consequently, if a keep-alive mechanism is implemented it | Consequently, if a keep-alive mechanism is implemented it | |||
MUST NOT interpret failure to respond to any specific probe | <bcp14>MUST NOT</bcp14> interpret failure to respond to any specific probe | |||
as a dead connection (MUST-29). | as a dead connection (MUST-29). | |||
</t> | </t> | |||
<t> | <t> | |||
An implementation SHOULD send a keep-alive segment with no | An implementation <bcp14>SHOULD</bcp14> send a keep-alive segment wi | |||
data (SHLD-12); however, it MAY be configurable to send a keep-alive | th no | |||
data (SHLD-12); however, it <bcp14>MAY</bcp14> be configurable to se | ||||
nd a keep-alive | ||||
segment containing one garbage octet (MAY-6), for compatibility with | segment containing one garbage octet (MAY-6), for compatibility with | |||
erroneous TCP implementations. | erroneous TCP implementations. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="The Communication of Urgent Information" anchor="urgent"> | <section anchor="urgent" numbered="true" toc="default"> | |||
<t> | <name>The Communication of Urgent Information</name> | |||
As a result of implementation differences and middlebox interactions, new appl | <t> | |||
ications SHOULD NOT employ the TCP urgent mechanism (SHLD-13). However, TCP imp | As a result of implementation differences and middlebox interactions, new appl | |||
lementations MUST still include support for the urgent mechanism (MUST-30). Inf | ications <bcp14>SHOULD NOT</bcp14> employ the TCP urgent mechanism (SHLD-13). H | |||
ormation on how some TCP implementations interpret the urgent pointer can be fou | owever, TCP implementations <bcp14>MUST</bcp14> still include support for the ur | |||
nd in RFC 6093 <xref target="RFC6093"/>. | gent mechanism (MUST-30). Information on how some TCP implementations interpret | |||
the urgent pointer can be found in RFC 6093 <xref target="RFC6093" format="defa | ||||
ult"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
The objective of the TCP urgent mechanism is to allow the sending user | The objective of the TCP urgent mechanism is to allow the sending user | |||
to stimulate the receiving user to accept some urgent data and to | to stimulate the receiving user to accept some urgent data and to | |||
permit the receiving TCP endpoint to indicate to the receiving user when all | permit the receiving TCP endpoint to indicate to the receiving user when all | |||
the currently known urgent data has been received by the user. | the currently known urgent data has been received by the user. | |||
</t> | </t> | |||
<t> | <t> | |||
This mechanism permits a point in the data stream to be designated as | This mechanism permits a point in the data stream to be designated as | |||
the end of urgent information. Whenever this point is in advance of | the end of urgent information. Whenever this point is in advance of | |||
the receive sequence number (RCV.NXT) at the receiving TCP endpoint, that TCP | the receive sequence number (RCV.NXT) at the receiving TCP endpoint, then the | |||
must tell the user to go into "urgent mode"; when the receive sequen | TCP implementation | |||
ce | must tell the user to go into "urgent mode"; when the receive sequence | |||
number catches up to the urgent pointer, the TCP implementation must tell user to go | number catches up to the urgent pointer, the TCP implementation must tell user to go | |||
into "normal mode". If the urgent pointer is updated while the user | into "normal mode". If the urgent pointer is updated while the user | |||
is in "urgent mode", the update will be invisible to the user. | is in "urgent mode", the update will be invisible to the user. | |||
</t> | </t> | |||
<t> | <t> | |||
The method employs an urgent field that is carried in all segments | The method employs an urgent field that is carried in all segments | |||
transmitted. The URG control flag indicates that the urgent field is | transmitted. The URG control flag indicates that the urgent field is | |||
meaningful and must be added to the segment sequence number to yield | meaningful and must be added to the segment sequence number to yield | |||
the urgent pointer. The absence of this flag indicates that there is | the urgent pointer. The absence of this flag indicates that there is | |||
no urgent data outstanding. | no urgent data outstanding. | |||
</t> | </t> | |||
<t> | <t> | |||
To send an urgent indication the user must also send at least one data | To send an urgent indication, the user must also send at least one data | |||
octet. If the sending user also indicates a push, timely delivery of | octet. If the sending user also indicates a push, timely delivery of | |||
the urgent information to the destination process is enhanced. Note that beca | the urgent information to the destination process is enhanced. Note that beca | |||
use changes in the urgent pointer correspond to data being written by a sending | use changes in the urgent pointer correspond to data being written by a sending | |||
application, the urgent pointer can not "recede" in the sequence space | application, the urgent pointer cannot "recede" in the sequence space, but a TCP | |||
, but a TCP receiver should be robust to invalid urgent pointer values. | receiver should be robust to invalid urgent pointer values. | |||
</t> | ||||
<t> | ||||
A TCP implementation MUST support a sequence of urgent data of any length (MUS | ||||
T-31). <xref target="RFC1122"/> | ||||
</t> | </t> | |||
<t> | <t> | |||
The urgent pointer MUST point to the sequence number of the octet following the | A TCP implementation <bcp14>MUST</bcp14> support a sequence of urgent data of | |||
urgent data (MUST-62). | any length (MUST-31) <xref target="RFC1122" format="default"/>. | |||
</t> | ||||
<t> | ||||
The urgent pointer <bcp14>MUST</bcp14> point to the sequence number of the octet | ||||
following the urgent data (MUST-62). | ||||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation MUST (MUST-32) inform the application layer asynchronousl | A TCP implementation <bcp14>MUST</bcp14> (MUST-32) inform the application laye | |||
y whenever it receives an Urgent pointer and there was previously no pending urg | r asynchronously whenever it receives an urgent pointer and there was previously | |||
ent data, or whenever the Urgent pointer advances in the data stream. The TCP i | no pending urgent data, or whenever the urgent pointer advances in the data str | |||
mplementation MUST (MUST-33) provide a way for the application to learn how much | eam. The TCP implementation <bcp14>MUST</bcp14> (MUST-33) provide a way for the | |||
urgent data remains to be read from the connection, or at least to determine wh | application to learn how much urgent data remains to be read from the connectio | |||
ether more urgent data remains to be read <xref target="RFC1122"/>. | n, or at least to determine whether more urgent data remains to be read <xref ta | |||
rget="RFC1122" format="default"/>. | ||||
</t> | </t> | |||
</section> | </section> | |||
<section title="Managing the Window"> | <section numbered="true" toc="default"> | |||
<t> | <name>Managing the Window</name> | |||
<t> | ||||
The window sent in each segment indicates the range of sequence | The window sent in each segment indicates the range of sequence | |||
numbers the sender of the window (the data receiver) is currently | numbers the sender of the window (the data receiver) is currently | |||
prepared to accept. There is an assumption that this is related to | prepared to accept. There is an assumption that this is related to | |||
the currently available data buffer space available for this | the data buffer space currently available for this | |||
connection. | connection. | |||
</t> | </t> | |||
<t> | <t> | |||
The sending TCP endpoint packages the data to be transmitted into segments | The sending TCP endpoint packages the data to be transmitted into segments | |||
that fit the current window, and may repackage segments on the | that fit the current window, and may repackage segments on the | |||
retransmission queue. Such repackaging is not required, but may be | retransmission queue. Such repackaging is not required but may be | |||
helpful. | helpful. | |||
</t> | </t> | |||
<t> | <t> | |||
In a connection with a one-way data flow, the window information will | In a connection with a one-way data flow, the window information will | |||
be carried in acknowledgment segments that all have the same sequence | be carried in acknowledgment segments that all have the same sequence | |||
number, so there will be no way to reorder them if they arrive out of | number, so there will be no way to reorder them if they arrive out of | |||
order. This is not a serious problem, but it will allow the window | order. This is not a serious problem, but it will allow the window | |||
information to be on occasion temporarily based on old reports from | information to be on occasion temporarily based on old reports from | |||
the data receiver. A refinement to avoid this problem is to act on | the data receiver. A refinement to avoid this problem is to act on | |||
the window information from segments that carry the highest | the window information from segments that carry the highest | |||
acknowledgment number (that is segments with acknowledgment number | acknowledgment number (that is, segments with an acknowledgment number | |||
equal or greater than the highest previously received). | equal to or greater than the highest previously received). | |||
</t> | </t> | |||
<t> | <t> | |||
Indicating a large window encourages transmissions. If more data | Indicating a large window encourages transmissions. If more data | |||
arrives than can be accepted, it will be discarded. This will result | arrives than can be accepted, it will be discarded. This will result | |||
in excessive retransmissions, adding unnecessarily to the load on the | in excessive retransmissions, adding unnecessarily to the load on the | |||
network and the TCP endpoints. Indicating a small window may restrict the | network and the TCP endpoints. Indicating a small window may restrict the | |||
transmission of data to the point of introducing a round trip delay | transmission of data to the point of introducing a round-trip delay | |||
between each new segment transmitted. | between each new segment transmitted. | |||
</t> | </t> | |||
<t> | <t> | |||
The mechanisms provided allow a TCP endpoint to advertise a large window and t o | The mechanisms provided allow a TCP endpoint to advertise a large window and t o | |||
subsequently advertise a much smaller window without having accepted | subsequently advertise a much smaller window without having accepted | |||
that much data. This, so-called "shrinking the window," is strongly | that much data. This so-called "shrinking the window" is strongly | |||
discouraged. The robustness principle <xref target="RFC1122"/> dictates that | discouraged. The robustness principle <xref target="RFC1122" format="default" | |||
TCP peers will not | /> dictates that TCP peers will not | |||
shrink the window themselves, but will be prepared for such behavior | shrink the window themselves, but will be prepared for such behavior | |||
on the part of other TCP peers. | on the part of other TCP peers. | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP receiver SHOULD NOT shrink the window, i.e., move the | A TCP receiver <bcp14>SHOULD NOT</bcp14> shrink the window, i.e., move the | |||
right window edge to the left (SHLD-14). However, a sending TCP peer MUST | right window edge to the left (SHLD-14). However, a sending TCP peer <bcp14>M | |||
UST</bcp14> | ||||
be robust against window shrinking, which may cause the | be robust against window shrinking, which may cause the | |||
"usable window" (see <xref target="SWSsender"/>) to become negative (MUST-34). | "usable window" (see <xref target="SWSsender" format="default"/>) to become ne gative (MUST-34). | |||
</t> | </t> | |||
<t> | <t> | |||
If this happens, the sender SHOULD NOT send new data (SHLD-15), but | If this happens, the sender <bcp14>SHOULD NOT</bcp14> send new data (SHLD-15), | |||
SHOULD retransmit normally the old unacknowledged data | but | |||
between SND.UNA and SND.UNA+SND.WND (SHLD-16). The sender MAY also | <bcp14>SHOULD</bcp14> retransmit normally the old unacknowledged data | |||
retransmit old data beyond SND.UNA+SND.WND (MAY-7), but SHOULD NOT | between SND.UNA and SND.UNA+SND.WND (SHLD-16). The sender <bcp14>MAY</bcp14> | |||
also | ||||
retransmit old data beyond SND.UNA+SND.WND (MAY-7), but <bcp14>SHOULD NOT</bcp | ||||
14> | ||||
time out the connection if data beyond the right window edge | time out the connection if data beyond the right window edge | |||
is not acknowledged (SHLD-17). If the window shrinks to zero, the TCP impleme ntation | is not acknowledged (SHLD-17). If the window shrinks to zero, the TCP impleme ntation | |||
MUST probe it in the standard way (described below) (MUST-35). | <bcp14>MUST</bcp14> probe it in the standard way (described below) (MUST-35). | |||
</t> | </t> | |||
<section title="Zero Window Probing" anchor="zwp"> | <section anchor="zwp" numbered="true" toc="default"> | |||
<t> | <name>Zero-Window Probing</name> | |||
<t> | ||||
The sending TCP peer must regularly transmit at least one octet of new data | The sending TCP peer must regularly transmit at least one octet of new data | |||
(if available) or retransmit to the receiving TCP peer even if the send | (if available), or retransmit to the receiving TCP peer even if the send | |||
window is zero, in order to "probe" the window. This | window is zero, in order to "probe" the window. This | |||
retransmission is essential to guarantee that when either TCP peer has a zero | retransmission is essential to guarantee that when either TCP peer has a zero | |||
window the re-opening of the window will be reliably reported to the other. | window the reopening of the window will be reliably reported to the other. | |||
This is referred to as Zero-Window Probing (ZWP) in other documents. | This is referred to as Zero-Window Probing (ZWP) in other documents. | |||
</t> | </t> | |||
<t> | <t> | |||
Probing of zero (offered) windows MUST be supported (MUST-36). | Probing of zero (offered) windows <bcp14>MUST</bcp14> be supported (MUST-36). | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation MAY keep its offered receive window closed | A TCP implementation <bcp14>MAY</bcp14> keep its offered receive window closed | |||
indefinitely (MAY-8). As long as the receiving TCP peer continues to | indefinitely (MAY-8). As long as the receiving TCP peer continues to | |||
send acknowledgments in response to the probe segments, the | send acknowledgments in response to the probe segments, the | |||
sending TCP peer MUST allow the connection to stay open (MUST-37). This | sending TCP peer <bcp14>MUST</bcp14> allow the connection to stay open (MUST-3 | |||
enables TCP to function in scenarios such as the "printer | 7). This | |||
ran out of paper" situation described in Section 4.2.2.17 | enables TCP to function in scenarios such as the "printer | |||
of <xref target="RFC1122"/>. The behavior is subject to the implementation's | ran out of paper" situation described in | |||
resource | <xref target="RFC1122" section="4.2.2.17" sectionFormat="of" format="default"/ | |||
management concerns, as noted in <xref target="RFC6429"/>. | >. The behavior is subject to the implementation's resource | |||
management concerns, as noted in <xref target="RFC6429" format="default"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
When the receiving TCP peer has a zero window and a segment arrives it must | When the receiving TCP peer has a zero window and a segment arrives, it must | |||
still send an acknowledgment showing its next expected sequence number | still send an acknowledgment showing its next expected sequence number | |||
and current window (zero). | and current window (zero). | |||
</t> | </t> | |||
<t> | <t> | |||
The transmitting host SHOULD send the first zero-window probe when a zero | The transmitting host <bcp14>SHOULD</bcp14> send the first zero-window probe whe | |||
window has existed for the retransmission timeout period (SHLD-29) (<xref | n a zero | |||
target="RTO"/>), and SHOULD increase exponentially the interval between | window has existed for the retransmission timeout period (SHLD-29) (<xref target | |||
="RTO" format="default"/>), and <bcp14>SHOULD</bcp14> increase exponentially the | ||||
interval between | ||||
successive probes (SHLD-30). | successive probes (SHLD-30). | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Silly Window Syndrome Avoidance"> | <name>Silly Window Syndrome Avoidance</name> | |||
<t>The "Silly Window Syndrome" (SWS) is a stable pattern of small incr | <t>The "Silly Window Syndrome" (SWS) is a stable pattern of small in | |||
emental window movements resulting in extremely poor TCP performance. Algorithm | cremental window movements resulting in extremely poor TCP performance. Algorit | |||
s to avoid SWS are described below for both the sending side and the receiving s | hms to avoid SWS are described below for both the sending side and the receiving | |||
ide. RFC 1122 contains more detailed discussion of the SWS problem. Note that | side. RFC 1122 contains more detailed discussion of the SWS problem. Note tha | |||
the Nagle algorithm and the sender SWS avoidance algorithm play complementary ro | t the Nagle algorithm and the sender SWS avoidance algorithm play complementary | |||
les in improving performance. The Nagle algorithm discourages sending tiny segm | roles in improving performance. The Nagle algorithm discourages sending tiny se | |||
ents when the data to be sent increases in small increments, while the SWS avoid | gments when the data to be sent increases in small increments, while the SWS avo | |||
ance algorithm discourages small segments resulting from the right window edge a | idance algorithm discourages small segments resulting from the right window edge | |||
dvancing in small increments.</t> | advancing in small increments.</t> | |||
<section anchor="SWSsender" numbered="true" toc="default"> | ||||
<section title="Sender's Algorithm - When to Send Data" anchor="SWSsender"> | <name>Sender's Algorithm -- When to Send Data</name> | |||
<t> | <t> | |||
A TCP implementation MUST include a SWS avoidance algorithm in the s | A TCP implementation <bcp14>MUST</bcp14> include a SWS avoidance alg | |||
ender (MUST-38). | orithm in the sender (MUST-38). | |||
</t> | </t> | |||
<t> | <t> | |||
The Nagle algorithm from <xref target="nagle"/> additionally describes how to co | The Nagle algorithm from <xref target="nagle" format="default"/> additionally de | |||
alesce short segments. | scribes how to coalesce short segments. | |||
</t> | </t> | |||
<t> | <t> | |||
The sender's SWS avoidance algorithm is more difficult | The sender's SWS avoidance algorithm is more difficult | |||
than the receiver's, because the sender does not know | than the receiver's because the sender does not know | |||
(directly) the receiver's total buffer space RCV.BUFF. | (directly) the receiver's total buffer space (RCV.BUFF). | |||
An approach that has been found to work well is for | An approach that has been found to work well is for | |||
the sender to calculate Max(SND.WND), the maximum send | the sender to calculate Max(SND.WND), which is the maximum send | |||
window it has seen so far on the connection, and to use | window it has seen so far on the connection, and to use | |||
this value as an estimate of RCV.BUFF. Unfortunately, | this value as an estimate of RCV.BUFF. Unfortunately, | |||
this can only be an estimate; the receiver may at any | this can only be an estimate; the receiver may at any | |||
time reduce the size of RCV.BUFF. To avoid a resulting | time reduce the size of RCV.BUFF. To avoid a resulting | |||
deadlock, it is necessary to have a timeout to force | deadlock, it is necessary to have a timeout to force | |||
transmission of data, overriding the SWS avoidance | transmission of data, overriding the SWS avoidance | |||
algorithm. In practice, this timeout should seldom | algorithm. In practice, this timeout should seldom | |||
occur. | occur. | |||
</t> | </t> | |||
<t> | <t> | |||
The "usable window" is:<list><t> | The "usable window" is:</t> | |||
U = SND.UNA + SND.WND - SND.NXT</t></list> | <t indent="3">U = SND.UNA + SND.WND - SND.NXT</t> | |||
<t> | ||||
i.e., the offered window less the amount of data sent | i.e., the offered window less the amount of data sent | |||
but not acknowledged. If D is the amount of data | but not acknowledged. If D is the amount of data | |||
queued in the sending TCP endpoint but not yet sent, then the | queued in the sending TCP endpoint but not yet sent, then the | |||
following set of rules is recommended. | following set of rules is recommended. | |||
</t> | </t> | |||
<t> | <t> | |||
Send data:<list style="hanging" hangIndent="5"> | Send data:</t> | |||
<ol type="(%d)" spacing="normal"> | ||||
<t hangText="(1)"> | <li> | |||
if a maximum-sized segment can be sent, i.e., if:<list><t> | <t> | |||
if a maximum-sized segment can be sent, i.e., if:</t> | ||||
<t indent="3"> | ||||
min(D,U) >= Eff.snd.MSS;</t></list> | min(D,U) >= Eff.snd.MSS;</t> | |||
</t> | </li> | |||
<t hangText="(2)"> | <li> | |||
<t> | ||||
or if the data is pushed and all queued data can | or if the data is pushed and all queued data can | |||
be sent now, i.e., if:<list><t> | be sent now, i.e., if:</t> | |||
<t indent="3"> | ||||
[SND.NXT = SND.UNA and] PUSHED and D <= U</t></list> | ||||
[SND.NXT = SND.UNA and] PUSHed and D <= U | ||||
</t> | ||||
<t> | ||||
(the bracketed condition is imposed by the Nagle | (the bracketed condition is imposed by the Nagle | |||
algorithm); | algorithm); | |||
</t> | </t> | |||
<t hangText="(3)"> | </li> | |||
<li> | ||||
<t> | ||||
or if at least a fraction Fs of the maximum window | or if at least a fraction Fs of the maximum window | |||
can be sent, i.e., if:<list><t> | can be sent, i.e., if:</t> | |||
<t indent="3"> | ||||
[SND.NXT = SND.UNA and]<list><t> | ||||
min(D,U) >= Fs * Max(SND.WND);</t></list></t></list> | [SND.NXT = SND.UNA and]</t> | |||
<t indent="6"> | ||||
</t> | min(D,U) >= Fs * Max(SND.WND);</t> | |||
<t hangText="(4)"> | </li> | |||
<li> | ||||
or if the override timeout | or if the override timeout | |||
occurs. | occurs.</li> | |||
</t></list> | </ol> | |||
</t> | <t> | |||
<t> | ||||
Here Fs is a fraction whose recommended value is 1/2. | Here Fs is a fraction whose recommended value is 1/2. | |||
The override timeout should be in the range 0.1 - 1.0 | The override timeout should be in the range 0.1 - 1.0 | |||
seconds. It may be convenient to combine this timer | seconds. It may be convenient to combine this timer | |||
with the timer used to probe zero windows | with the timer used to probe zero windows | |||
(<xref target="zwp"/>). | (<xref target="zwp" format="default"/>). | |||
</t> | </t> | |||
</section> | ||||
</section> | <section numbered="true" toc="default"> | |||
<section title="Receiver's Algorithm - When to Send a Window Update"> | <name>Receiver's Algorithm -- When to Send a Window Update</name> | |||
<t> | <t> | |||
A TCP implementation MUST include a SWS avoidance algorithm in the r | A TCP implementation <bcp14>MUST</bcp14> include a SWS avoidance alg | |||
eceiver (MUST-39). | orithm in the receiver (MUST-39). | |||
</t> | </t> | |||
<t> | <t> | |||
The receiver's SWS avoidance algorithm determines when | The receiver's SWS avoidance algorithm determines when | |||
the right window edge may be advanced; this is | the right window edge may be advanced; this is | |||
customarily known as "updating the window". This | customarily known as "updating the window". This | |||
algorithm combines with the delayed ACK algorithm | algorithm combines with the delayed ACK algorithm | |||
(<xref target="delACK"/>) to determine when an ACK segment | (<xref target="delACK" format="default"/>) to determine when an ACK segment | |||
containing the current window will really be sent to | containing the current window will really be sent to | |||
the receiver. | the receiver. | |||
</t> | </t> | |||
<t> | <t> | |||
The solution to receiver SWS is to avoid advancing the | The solution to receiver SWS is to avoid advancing the | |||
right window edge RCV.NXT+RCV.WND in small increments, | right window edge RCV.NXT+RCV.WND in small increments, | |||
even if data is received from the network in small | even if data is received from the network in small | |||
segments. | segments. | |||
</t> | </t> | |||
<t> | <t> | |||
Suppose the total receive buffer space is RCV.BUFF. At | Suppose the total receive buffer space is RCV.BUFF. At | |||
any given moment, RCV.USER octets of this total may be | any given moment, RCV.USER octets of this total may be | |||
tied up with data that has been received and | tied up with data that has been received and | |||
acknowledged but that the user process has not yet | acknowledged but that the user process has not yet | |||
consumed. When the connection is quiescent, RCV.WND = | consumed. When the connection is quiescent, RCV.WND = | |||
RCV.BUFF and RCV.USER = 0. | RCV.BUFF and RCV.USER = 0. | |||
</t> | </t> | |||
<t> | <t> | |||
Keeping the right window edge fixed as data arrives and | Keeping the right window edge fixed as data arrives and | |||
is acknowledged requires that the receiver offer less | is acknowledged requires that the receiver offer less | |||
than its full buffer space, i.e., the receiver must | than its full buffer space, i.e., the receiver must | |||
specify a RCV.WND that keeps RCV.NXT+RCV.WND constant | specify a RCV.WND that keeps RCV.NXT+RCV.WND constant | |||
as RCV.NXT increases. Thus, the total buffer space | as RCV.NXT increases. Thus, the total buffer space | |||
RCV.BUFF is generally divided into three parts: | RCV.BUFF is generally divided into three parts: | |||
</t> | </t> | |||
<t><figure><artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
|<------- RCV.BUFF ---------------->| | ||||
|<------- RCV.BUFF ---------------->| | ||||
1 2 3 | 1 2 3 | |||
----|---------|------------------|------|---- | ----|---------|------------------|------|---- | |||
RCV.NXT ^ | RCV.NXT ^ | |||
(Fixed) | (Fixed) | |||
1 - RCV.USER = data received but not yet consumed; | 1 - RCV.USER = data received but not yet consumed; | |||
2 - RCV.WND = space advertised to sender; | 2 - RCV.WND = space advertised to sender; | |||
3 - Reduction = space available but not yet | 3 - Reduction = space available but not yet | |||
advertised. | advertised. | |||
]]></artwork> | ||||
</artwork></figure></t> | <t> | |||
<t> | ||||
The suggested SWS avoidance algorithm for the receiver | The suggested SWS avoidance algorithm for the receiver | |||
is to keep RCV.NXT+RCV.WND fixed until the reduction | is to keep RCV.NXT+RCV.WND fixed until the reduction | |||
satisfies: | satisfies: | |||
</t> | </t> | |||
<t><figure><artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
RCV.BUFF - RCV.USER - RCV.WND >= | RCV.BUFF - RCV.USER - RCV.WND >= | |||
min( Fr * RCV.BUFF, Eff.snd.MSS ) | min( Fr * RCV.BUFF, Eff.snd.MSS ) | |||
</artwork></figure></t> | ]]></artwork> | |||
<t> | <t> | |||
where Fr is a fraction whose recommended value is 1/2, | where Fr is a fraction whose recommended value is 1/2, | |||
and Eff.snd.MSS is the effective send MSS for the | and Eff.snd.MSS is the effective send MSS for the | |||
connection (see <xref target="mss"/>). When the inequality | connection (see <xref target="mss" format="default"/>). When the in equality | |||
is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER. | is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER. | |||
</t> | </t> | |||
<t> | <t> | |||
Note that the general effect of this algorithm is to | Note that the general effect of this algorithm is to | |||
advance RCV.WND in increments of Eff.snd.MSS (for | advance RCV.WND in increments of Eff.snd.MSS (for | |||
realistic receive buffers: Eff.snd.MSS < RCV.BUFF/2). | realistic receive buffers: Eff.snd.MSS < RCV.BUFF/2). | |||
Note also that the receiver must use its own | Note also that the receiver must use its own | |||
Eff.snd.MSS, making the assumption that it is the same as the sender 's. | Eff.snd.MSS, making the assumption that it is the same as the sender 's. | |||
</t> | </t> | |||
</section> | ||||
</section> | </section> | |||
</section> | <section anchor="delACK" numbered="true" toc="default"> | |||
<section title="Delayed Acknowledgements - When to Send an ACK Segment" anchor=" | <name>Delayed Acknowledgments -- When to Send an ACK Segment</name> | |||
delACK"> | <t> | |||
<t> | ||||
A host that is receiving a stream of TCP data segments can | A host that is receiving a stream of TCP data segments can | |||
increase efficiency in both the Internet and the hosts by | increase efficiency in both the network and the hosts by | |||
sending fewer than one ACK (acknowledgment) segment per data | sending fewer than one ACK (acknowledgment) segment per data | |||
segment received; this is known as a "delayed ACK". | segment received; this is known as a "delayed ACK". | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP endpoint SHOULD implement a delayed ACK (SHLD-18), but an ACK | A TCP endpoint <bcp14>SHOULD</bcp14> implement a delayed ACK (SHLD-18 | |||
should not be excessively delayed; in particular, the delay MUST be | ), but an ACK | |||
less than 0.5 seconds (MUST-40). An ACK SHOULD be generated for at | should not be excessively delayed; in particular, the delay <bcp14>MU | |||
ST</bcp14> be | ||||
less than 0.5 seconds (MUST-40). An ACK <bcp14>SHOULD</bcp14> be gen | ||||
erated for at | ||||
least every second full-sized segment or 2*RMSS bytes of new data | least every second full-sized segment or 2*RMSS bytes of new data | |||
(where RMSS is the MSS specified by the TCP endpoint receiving the | (where RMSS is the MSS specified by the TCP endpoint receiving the | |||
segments to be acknowledged, or the default value if not specified) | segments to be acknowledged, or the default value if not specified) | |||
(SHLD-19). Excessive delays on ACKs can disturb the round-trip | (SHLD-19). Excessive delays on ACKs can disturb the round-trip | |||
timing and packet "clocking" algorithms. More complete | timing and packet "clocking" algorithms. More complete | |||
discussion of delayed ACK behavior is in Section 4.2 of RFC 5681 | discussion of delayed ACK behavior is in Section <xref target="RFC568 | |||
<xref target="RFC5681"/>, including recommendations to immediately | 1" section="4.2" sectionFormat="bare" format="default"/> of RFC 5681 | |||
<xref target="RFC5681" format="default"/>, including recommendations | ||||
to immediately | ||||
acknowledge out-of-order segments, segments above a gap in sequence | acknowledge out-of-order segments, segments above a gap in sequence | |||
space, or segments that fill all or part of a gap, in order to | space, or segments that fill all or part of a gap, in order to | |||
accelerate loss recovery. | accelerate loss recovery. | |||
</t> | </t> | |||
<t> | <t> | |||
Note that there are several current | Note that there are several current | |||
practices that further lead to a reduced number of ACKs, including | practices that further lead to a reduced number of ACKs, including | |||
generic receive offload (GRO) <xref target="offload"/>, ACK compressi | generic receive offload (GRO) <xref target="offload" format="default" | |||
on, and ACK decimation | />, ACK compression, and ACK decimation | |||
<xref target="RFC3449"/>. | <xref target="RFC3449" format="default"/>. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | ||||
</section> | <section numbered="true" toc="default"> | |||
<section title="Interfaces"> | <name>Interfaces</name> | |||
<t> | <t> | |||
There are of course two interfaces of concern: the user/TCP interface | There are of course two interfaces of concern: the user/TCP interface | |||
and the TCP/lower level interface. We have a fairly elaborate model | and the TCP/lower-level interface. We have a fairly elaborate model | |||
of the user/TCP interface, but the interface to the lower level | of the user/TCP interface, but the interface to the lower-level | |||
protocol module is left unspecified here, since it will be specified | protocol module is left unspecified here since it will be specified | |||
in detail by the specification of the lower level protocol. For the | in detail by the specification of the lower-level protocol. For the | |||
case that the lower level is IP we note some of the parameter values | case that the lower level is IP, we note some of the parameter values | |||
that TCP implementations might use. | that TCP implementations might use. | |||
</t> | </t> | |||
<section title="User/TCP Interface" anchor="user-api"> | <section anchor="user-api" numbered="true" toc="default"> | |||
<t> | <name>User/TCP Interface</name> | |||
<t> | ||||
The following functional description of user commands to the TCP implementat ion is, | The following functional description of user commands to the TCP implementat ion is, | |||
at best, fictional, since every operating system will have different | at best, fictional, since every operating system will have different | |||
facilities. Consequently, we must warn readers that different TCP | facilities. Consequently, we must warn readers that different TCP | |||
implementations may have different user interfaces. However, all | implementations may have different user interfaces. However, all | |||
TCP implementations must provide a certain minimum set of services to guaran tee | TCP implementations must provide a certain minimum set of services to guaran tee | |||
that all TCP implementations can support the same protocol | that all TCP implementations can support the same protocol | |||
hierarchy. This section specifies the functional interfaces | hierarchy. This section specifies the functional interfaces | |||
required of all TCP implementations. | required of all TCP implementations. | |||
</t> | </t> | |||
<t> | <t> | |||
Section 3.1 of <xref target="RFC8303"/> also identifies primitives provided by T | <xref target="RFC8303" section="3.1" sectionFormat="of" format="default"/> also | |||
CP, and could be used as an additional reference for implementers. | identifies primitives provided by TCP and could be used as an additional referen | |||
ce for implementers. | ||||
</t> | </t> | |||
<t> | <t> | |||
The following sections functionally characterize a USER/TCP | The following sections functionally characterize a user/TCP | |||
interface. The notation used is similar to most procedure or | interface. The notation used is similar to most procedure or | |||
function calls in high level languages, but this usage is not | function calls in high-level languages, but this usage is not | |||
meant to rule out trap type service calls. | meant to rule out trap-type service calls. | |||
</t> | </t> | |||
<t> | <t> | |||
The user commands described below specify the basic functions the | The user commands described below specify the basic functions the | |||
TCP implementation must perform to support interprocess communication. | TCP implementation must perform to support interprocess communication. | |||
Individual implementations must define their own exact format, and | Individual implementations must define their own exact format and | |||
may provide combinations or subsets of the basic functions in | may provide combinations or subsets of the basic functions in | |||
single calls. In particular, some implementations may wish to | single calls. In particular, some implementations may wish to | |||
automatically OPEN a connection on the first SEND or RECEIVE | automatically OPEN a connection on the first SEND or RECEIVE | |||
issued by the user for a given connection. | issued by the user for a given connection. | |||
</t> | </t> | |||
<t> | <t> | |||
In providing interprocess communication facilities, the TCP implementation must | In providing interprocess communication facilities, the TCP implementation must | |||
not only accept commands, but must also return information to the | not only accept commands, but must also return information to the | |||
processes it serves. The latter consists of: | processes it serves. The latter consists of: | |||
<list> | </t> | |||
<t> | <ol type="(%c)" spacing="normal"> | |||
(a) general information about a connection (e.g., interrupts, | <li> | |||
general information about a connection (e.g., interrupts, | ||||
remote close, binding of unspecified remote socket). | remote close, binding of unspecified remote socket). | |||
</t> | </li> | |||
<t> | <li> | |||
(b) replies to specific user commands indicating success or | replies to specific user commands indicating success or | |||
various types of failure. | various types of failure. | |||
</t> | </li> | |||
</list> | </ol> | |||
</t> | <section numbered="true" toc="default"> | |||
<name>Open</name> | ||||
<section title="Open"> | <t> | |||
<t> | ||||
<list> | ||||
<t> | ||||
Format: OPEN (local port, remote socket, active/passive | Format: OPEN (local port, remote socket, active/passive | |||
[, timeout] [, DiffServ field] [, security/compartment] | [, timeout] [, Diffserv field] [, security/compartment] | |||
[local IP address,] [, options]) | [, local IP address] [, options]) | |||
-> local connection name | -> local connection name | |||
</t> | </t> | |||
<t> | <t> | |||
If the active/passive flag is set to passive, then this is a | If the active/passive flag is set to passive, then this is a | |||
call to LISTEN for an incoming connection. A passive open may | call to LISTEN for an incoming connection. A passive OPEN may | |||
have either a fully specified remote socket to wait for a | have either a fully specified remote socket to wait for a | |||
particular connection or an unspecified remote socket to wait | particular connection or an unspecified remote socket to wait | |||
for any call. A fully specified passive call can be made active | for any call. A fully specified passive call can be made active | |||
by the subsequent execution of a SEND. | by the subsequent execution of a SEND. | |||
</t> | </t> | |||
<t> | <t> | |||
A transmission control block (TCB) is created and partially | A transmission control block (TCB) is created and partially | |||
filled in with data from the OPEN command parameters. | filled in with data from the OPEN command parameters. | |||
</t> | </t> | |||
<t> | <t> | |||
Every passive OPEN call either creates a new connection | Every passive OPEN call either creates a new connection | |||
record in LISTEN state, or it returns an error; it MUST NOT | record in LISTEN state, or it returns an error; it <bcp14>MUST NOT</bcp1 4> | |||
affect any previously created connection record (MUST-41). | affect any previously created connection record (MUST-41). | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation that supports multiple concurrent connections MUST | A TCP implementation that supports multiple concurrent connections <bcp1 | |||
provide | 4>MUST</bcp14> provide | |||
an OPEN call that will functionally allow an application to | an OPEN call that will functionally allow an application to | |||
LISTEN on a port while a connection block with the same | LISTEN on a port while a connection block with the same | |||
local port is in SYN-SENT or SYN-RECEIVED state (MUST-42). | local port is in SYN-SENT or SYN-RECEIVED state (MUST-42). | |||
</t> | </t> | |||
<t> | <t> | |||
On an active OPEN command, the TCP endpoint will begin the procedure to | On an active OPEN command, the TCP endpoint will begin the procedure to | |||
synchronize (i.e., establish) the connection at once. | synchronize (i.e., establish) the connection at once. | |||
</t> | </t> | |||
<t> | <t> | |||
The timeout, if present, permits the caller to set up a timeout | The timeout, if present, permits the caller to set up a timeout | |||
for all data submitted to TCP. If data is not successfully | for all data submitted to TCP. If data is not successfully | |||
delivered to the destination within the timeout period, the TCP endpoint | delivered to the destination within the timeout period, the TCP endpoint | |||
will abort the connection. The present global default is five | will abort the connection. The present global default is five | |||
minutes. | minutes. | |||
</t> | </t> | |||
<t> | ||||
<t> | ||||
The TCP implementation or some component of the operating system will ve rify | The TCP implementation or some component of the operating system will ve rify | |||
the user's authority to open a connection with the specified | the user's authority to open a connection with the specified | |||
DiffServ field value or security/compartment. The absence of a | Diffserv field value or security/compartment. The absence of a | |||
DiffServ field value | Diffserv field value | |||
or security/compartment specification in the OPEN call indicates | or security/compartment specification in the OPEN call indicates | |||
the default values must be used. | the default values must be used. | |||
</t> | </t> | |||
<t> | <t> | |||
TCP will accept incoming requests as matching only if the | TCP will accept incoming requests as matching only if the | |||
security/compartment information is exactly the same as that | security/compartment information is exactly the same as that | |||
requested in the OPEN call. | requested in the OPEN call. | |||
</t> | </t> | |||
<t> | <t> | |||
The DiffServ field value indicated by the user only impacts outgoing packets, ma | The Diffserv field value indicated by the user only impacts outgoing packets, ma | |||
y be altered en route through the network, and has no direct bearing or relation | y be altered en route through the network, and has no direct bearing or relation | |||
to received packets. | to received packets. | |||
</t> | </t> | |||
<t> | <t> | |||
A local connection name will be returned to the user by the TCP implemen tation. | A local connection name will be returned to the user by the TCP implemen tation. | |||
The local connection name can then be used as a short-hand term | The local connection name can then be used as a shorthand term | |||
for the connection defined by the <local socket, remote socket> | for the connection defined by the <local socket, remote socket> | |||
pair. | pair. | |||
</t> | </t> | |||
<t> | <t> | |||
The optional "local IP address" parameter MUST be supported | The optional "local IP address" parameter <bcp14>MUST</bcp14> be supporte | |||
d | ||||
to allow the specification of the local IP address (MUST-43). This enab les | to allow the specification of the local IP address (MUST-43). This enab les | |||
applications that need to select the local IP address used when | applications that need to select the local IP address used when | |||
multihoming is present. | multihoming is present. | |||
</t> | </t> | |||
<t> | ||||
<t> | A passive OPEN call with a specified "local IP address" | |||
A passive OPEN call with a specified "local IP address" | ||||
parameter will await an incoming connection request to | parameter will await an incoming connection request to | |||
that address. If the parameter is unspecified, a | that address. If the parameter is unspecified, a | |||
passive OPEN will await an incoming connection request | passive OPEN will await an incoming connection request | |||
to any local IP address, and then bind the local IP | to any local IP address and then bind the local IP | |||
address of the connection to the particular address | address of the connection to the particular address | |||
that is used. | that is used. | |||
</t> | </t> | |||
<t> | <t> | |||
For an active OPEN call, a specified "local IP address" parameter | For an active OPEN call, a specified "local IP address" parameter | |||
will be used for opening the connection. If the parameter is unspecified, the | will be used for opening the connection. If the parameter is unspecified, the | |||
host will choose an appropriate local IP address (see RFC 1122 section 3.3.4.2). | host will choose an appropriate local IP address (see RFC 1122, Section <xref ta rget="RFC1122" section="3.3.4.2" sectionFormat="bare" format="default"/>). | |||
</t> | </t> | |||
<t> | ||||
<t> | ||||
If an application on a multihomed host does not specify the | If an application on a multihomed host does not specify the | |||
local IP address when actively opening a TCP connection, | local IP address when actively opening a TCP connection, | |||
then the TCP implementation MUST ask the IP layer to select a local IP | then the TCP implementation <bcp14>MUST</bcp14> ask the IP layer to sele ct a local IP | |||
address before sending the (first) SYN (MUST-44). See the function | address before sending the (first) SYN (MUST-44). See the function | |||
GET_SRCADDR() in Section 3.4 of RFC 1122. | GET_SRCADDR() in Section <xref target="RFC1122" section="3.4" sectionFor mat="bare" format="default"/> of RFC 1122. | |||
</t> | </t> | |||
<t> | <t> | |||
At all other times, a previous segment has either been sent | At all other times, a previous segment has either been sent | |||
or received on this connection, and TCP implementations MUST use the sam e | or received on this connection, and TCP implementations <bcp14>MUST</bcp 14> use the same | |||
local address that was used in those previous | local address that was used in those previous | |||
segments (MUST-45). | segments (MUST-45). | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation MUST reject as an error a local OPEN | A TCP implementation <bcp14>MUST</bcp14> reject as an error a local | |||
OPEN | ||||
call for an invalid remote IP address (e.g., a broadcast or | call for an invalid remote IP address (e.g., a broadcast or | |||
multicast address) (MUST-46). | multicast address) (MUST-46). | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Send"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Send</name> | |||
<t> | <t> | |||
Format: SEND (local connection name, buffer address, byte | Format: SEND (local connection name, buffer address, byte | |||
count, PUSH flag (optional), URGENT flag [,timeout]) | count, URGENT flag [, PUSH flag] [, timeout]) | |||
</t> | </t> | |||
<t> | <t> | |||
This call causes the data contained in the indicated user buffer | This call causes the data contained in the indicated user buffer | |||
to be sent on the indicated connection. If the connection has | to be sent on the indicated connection. If the connection has | |||
not been opened, the SEND is considered an error. Some | not been opened, the SEND is considered an error. Some | |||
implementations may allow users to SEND first; in which case, an | implementations may allow users to SEND first; in which case, an | |||
automatic OPEN would be done. For example, this might be one way | automatic OPEN would be done. For example, this might be one way | |||
for application data to be included in SYN segments. If the calling proce ss is not | for application data to be included in SYN segments. If the calling proce ss is not | |||
authorized to use this connection, an error is returned. | authorized to use this connection, an error is returned. | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP endpoint MAY implement PUSH flags on SEND calls (MAY-15). If PUSH flags a | A TCP endpoint <bcp14>MAY</bcp14> implement PUSH flags on SEND calls (MAY-15). | |||
re not | If PUSH flags are not | |||
implemented, then the sending TCP peer: (1) MUST NOT buffer data indefinitely (M | implemented, then the sending TCP peer: (1) <bcp14>MUST NOT</bcp14> buffer data | |||
UST-60), and | indefinitely (MUST-60), and | |||
(2) MUST set the PSH bit in the last buffered segment (i.e., when there is no | (2) <bcp14>MUST</bcp14> set the PSH bit in the last buffered segment (i.e., when | |||
there is no | ||||
more queued data to be sent) (MUST-61). The remaining description below assumes the PUSH | more queued data to be sent) (MUST-61). The remaining description below assumes the PUSH | |||
flag is supported on SEND calls. | flag is supported on SEND calls. | |||
</t> | </t> | |||
<t> | <t> | |||
If the PUSH flag is set, the application intends the data to be | If the PUSH flag is set, the application intends the data to be | |||
transmitted promptly to the receiver, and the PUSH bit will be set in the last | transmitted promptly to the receiver, and the PSH bit will be set in the last | |||
TCP segment created from the buffer. | TCP segment created from the buffer. | |||
</t> | </t> | |||
<t> | <t> | |||
The PSH bit is not a record marker and is independent of segment boundaries. | The PSH bit is not a record marker and is independent of segment boundaries. | |||
The transmitter SHOULD collapse successive bits when it packetizes data, to | The transmitter <bcp14>SHOULD</bcp14> collapse successive bits when it packetize s data, to | |||
send the largest possible segment (SHLD-27). | send the largest possible segment (SHLD-27). | |||
</t> | </t> | |||
<t> | <t> | |||
If the PUSH flag is not set, the data may be combined with data from | If the PUSH flag is not set, the data may be combined with data from | |||
subsequent SENDs for transmission efficiency. | subsequent SENDs for transmission efficiency. | |||
When an application issues a series of | When an application issues a series of | |||
SEND calls without setting the PUSH flag, the TCP implementation MAY aggregate t he data | SEND calls without setting the PUSH flag, the TCP implementation <bcp14>MAY</bcp 14> aggregate the data | |||
internally without sending it (MAY-16). | internally without sending it (MAY-16). | |||
Note that when the Nagle | Note that when the Nagle | |||
algorithm is in use, TCP implementations may buffer the data before sending, wit hout regard to | algorithm is in use, TCP implementations may buffer the data before sending, wit hout regard to | |||
the PUSH flag (see <xref target="nagle"/>). | the PUSH flag (see <xref target="nagle" format="default"/>). | |||
</t> | </t> | |||
<t> | <t> | |||
An application program is logically required to set the PUSH flag in a SEND | An application program is logically required to set the PUSH flag in a SEND | |||
call whenever it needs to force delivery of the data to avoid a communication | call whenever it needs to force delivery of the data to avoid a communication | |||
deadlock. However, a TCP implementation SHOULD send a maximum-sized segment whe | deadlock. However, a TCP implementation <bcp14>SHOULD</bcp14> send a maximum-si | |||
never | zed segment whenever | |||
possible (SHLD-28), to improve performance (see <xref target="SWSsender"/>). | possible (SHLD-28) to improve performance (see <xref target="SWSsender" format=" | |||
default"/>). | ||||
</t> | </t> | |||
<t> | ||||
<t> | New applications <bcp14>SHOULD NOT</bcp14> set the URGENT flag <xref tar | |||
New applications SHOULD NOT set the URGENT flag <xref target="RFC6093"/> | get="RFC6093" format="default"/> due to implementation differences and middlebox | |||
due to implementation differences and middlebox issues (SHLD-13). | issues (SHLD-13). | |||
</t> | </t> | |||
<t> | <t> | |||
If the URGENT flag is set, segments sent to the destination TCP peer | If the URGENT flag is set, segments sent to the destination TCP peer | |||
will have the urgent pointer set. The receiving TCP peer will signal | will have the urgent pointer set. The receiving TCP peer will signal | |||
the urgent condition to the receiving process if the urgent | the urgent condition to the receiving process if the urgent | |||
pointer indicates that data preceding the urgent pointer has not | pointer indicates that data preceding the urgent pointer has not | |||
been consumed by the receiving process. The purpose of urgent | been consumed by the receiving process. The purpose of the URGENT flag | |||
is to stimulate the receiver to process the urgent data and to | is to stimulate the receiver to process the urgent data and to | |||
indicate to the receiver when all the currently known urgent | indicate to the receiver when all the currently known urgent | |||
data has been received. The number of times the sending user's | data has been received. The number of times the sending user's | |||
TCP implementation signals urgent will not necessarily be equal to the n umber | TCP implementation signals urgent will not necessarily be equal to the n umber | |||
of times the receiving user will be notified of the presence of | of times the receiving user will be notified of the presence of | |||
urgent data. | urgent data. | |||
</t> | </t> | |||
<t> | <t> | |||
If no remote socket was specified in the OPEN, but the | If no remote socket was specified in the OPEN, but the | |||
connection is established (e.g., because a LISTENing connection | connection is established (e.g., because a LISTENing connection | |||
has become specific due to a remote segment arriving for the | has become specific due to a remote segment arriving for the | |||
local socket), then the designated buffer is sent to the implied | local socket), then the designated buffer is sent to the implied | |||
remote socket. Users who make use of OPEN with an unspecified | remote socket. Users who make use of OPEN with an unspecified | |||
remote socket can make use of SEND without ever explicitly | remote socket can make use of SEND without ever explicitly | |||
knowing the remote socket address. | knowing the remote socket address. | |||
</t> | </t> | |||
<t> | <t> | |||
However, if a SEND is attempted before the remote socket | However, if a SEND is attempted before the remote socket | |||
becomes specified, an error will be returned. Users can use the | becomes specified, an error will be returned. Users can use the | |||
STATUS call to determine the status of the connection. Some | STATUS call to determine the status of the connection. Some | |||
TCP implementations may notify the user when an unspecified | TCP implementations may notify the user when an unspecified | |||
socket is bound. | socket is bound. | |||
</t> | </t> | |||
<t> | <t> | |||
If a timeout is specified, the current user timeout for this | If a timeout is specified, the current user timeout for this | |||
connection is changed to the new one. | connection is changed to the new one. | |||
</t> | </t> | |||
<t> | <t> | |||
In the simplest implementation, SEND would not return control to | In the simplest implementation, SEND would not return control to | |||
the sending process until either the transmission was complete | the sending process until either the transmission was complete | |||
or the timeout had been exceeded. However, this simple method | or the timeout had been exceeded. However, this simple method | |||
is both subject to deadlocks (for example, both sides of the | is both subject to deadlocks (for example, both sides of the | |||
connection might try to do SENDs before doing any RECEIVEs) and | connection might try to do SENDs before doing any RECEIVEs) and | |||
offers poor performance, so it is not recommended. A more | offers poor performance, so it is not recommended. A more | |||
sophisticated implementation would return immediately to allow | sophisticated implementation would return immediately to allow | |||
the process to run concurrently with network I/O, and, | the process to run concurrently with network I/O, and, | |||
furthermore, to allow multiple SENDs to be in progress. | furthermore, to allow multiple SENDs to be in progress. | |||
Multiple SENDs are served in first come, first served order, so | Multiple SENDs are served in first come, first served order, so | |||
the TCP endpoint will queue those it cannot service immediately. | the TCP endpoint will queue those it cannot service immediately. | |||
</t> | </t> | |||
<t> | <t> | |||
We have implicitly assumed an asynchronous user interface in | We have implicitly assumed an asynchronous user interface in | |||
which a SEND later elicits some kind of SIGNAL or | which a SEND later elicits some kind of SIGNAL or | |||
pseudo-interrupt from the serving TCP endpoint. An alternative is to | pseudo-interrupt from the serving TCP endpoint. An alternative is to | |||
return a response immediately. For instance, SENDs might return | return a response immediately. For instance, SENDs might return | |||
immediate local acknowledgment, even if the segment sent had not | immediate local acknowledgment, even if the segment sent had not | |||
been acknowledged by the distant TCP endpoint. We could optimistically | been acknowledged by the distant TCP endpoint. We could optimistically | |||
assume eventual success. If we are wrong, the connection will | assume eventual success. If we are wrong, the connection will | |||
close anyway due to the timeout. In implementations of this | close anyway due to the timeout. In implementations of this | |||
kind (synchronous), there will still be some asynchronous | kind (synchronous), there will still be some asynchronous | |||
signals, but these will deal with the connection itself, and not | signals, but these will deal with the connection itself, and not | |||
with specific segments or buffers. | with specific segments or buffers. | |||
</t> | </t> | |||
<t> | <t> | |||
In order for the process to distinguish among error or success | In order for the process to distinguish among error or success | |||
indications for different SENDs, it might be appropriate for the | indications for different SENDs, it might be appropriate for the | |||
buffer address to be returned along with the coded response to | buffer address to be returned along with the coded response to | |||
the SEND request. TCP-to-user signals are discussed below, | the SEND request. TCP-to-user signals are discussed below, | |||
indicating the information that should be returned to the | indicating the information that should be returned to the | |||
calling process. | calling process. | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Receive"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Receive</name> | |||
<t> | ||||
<t> | ||||
Format: RECEIVE (local connection name, buffer address, byte | Format: RECEIVE (local connection name, buffer address, byte | |||
count) -> byte count, urgent flag, push flag (optional) | count) -> byte count, URGENT flag [, PUSH flag] | |||
</t> | </t> | |||
<t> | <t> | |||
This command allocates a receiving buffer associated with the | This command allocates a receiving buffer associated with the | |||
specified connection. If no OPEN precedes this command or the | specified connection. If no OPEN precedes this command or the | |||
calling process is not authorized to use this connection, an | calling process is not authorized to use this connection, an | |||
error is returned. | error is returned. | |||
</t> | </t> | |||
<t> | <t> | |||
In the simplest implementation, control would not return to the | In the simplest implementation, control would not return to the | |||
calling program until either the buffer was filled, or some | calling program until either the buffer was filled or some | |||
error occurred, but this scheme is highly subject to deadlocks. | error occurred, but this scheme is highly subject to deadlocks. | |||
A more sophisticated implementation would permit several | A more sophisticated implementation would permit several | |||
RECEIVEs to be outstanding at once. These would be filled as | RECEIVEs to be outstanding at once. These would be filled as | |||
segments arrive. This strategy permits increased throughput at | segments arrive. This strategy permits increased throughput at | |||
the cost of a more elaborate scheme (possibly asynchronous) to | the cost of a more elaborate scheme (possibly asynchronous) to | |||
notify the calling program that a PUSH has been seen or a buffer | notify the calling program that a PUSH has been seen or a buffer | |||
filled. | filled. | |||
</t> | </t> | |||
<t> | ||||
A TCP receiver MAY pass a received PSH flag to the application layer via the | <t> | |||
A TCP receiver <bcp14>MAY</bcp14> pass a received PSH bit to the application lay | ||||
er via the | ||||
PUSH flag in the interface (MAY-17), but it is not required (this was clarified in RFC | PUSH flag in the interface (MAY-17), but it is not required (this was clarified in RFC | |||
1122 section 4.2.2.2). The remainder of text describing the RECEIVE call below | 1122, Section <xref target="RFC1122" section="4.2.2.2" sectionFormat="bare" form at="default"/>). The remainder of text describing the RECEIVE call below | |||
assumes that passing the PUSH indication is supported. | assumes that passing the PUSH indication is supported. | |||
</t> | </t> | |||
<t> | <t> | |||
If enough data arrive to fill the buffer before a PUSH is seen, | If enough data arrive to fill the buffer before a PUSH is seen, | |||
the PUSH flag will not be set in the response to the RECEIVE. | the PUSH flag will not be set in the response to the RECEIVE. | |||
The buffer will be filled with as much data as it can hold. If | The buffer will be filled with as much data as it can hold. If | |||
a PUSH is seen before the buffer is filled the buffer will be | a PUSH is seen before the buffer is filled, the buffer will be | |||
returned partially filled and PUSH indicated. | returned partially filled and PUSH indicated. | |||
</t> | </t> | |||
<t> | <t> | |||
If there is urgent data the user will have been informed as soon | If there is urgent data, the user will have been informed as soon | |||
as it arrived via a TCP-to-user signal. The receiving user | as it arrived via a TCP-to-user signal. The receiving user | |||
should thus be in "urgent mode". If the URGENT flag is on, | should thus be in "urgent mode". If the URGENT flag is on, | |||
additional urgent data remains. If the URGENT flag is off, this | additional urgent data remains. If the URGENT flag is off, this | |||
call to RECEIVE has returned all the urgent data, and the user | call to RECEIVE has returned all the urgent data, and the user | |||
may now leave "urgent mode". Note that data following the | may now leave "urgent mode". Note that data following the | |||
urgent pointer (non-urgent data) cannot be delivered to the user | urgent pointer (non-urgent data) cannot be delivered to the user | |||
in the same buffer with preceding urgent data unless the | in the same buffer with preceding urgent data unless the | |||
boundary is clearly marked for the user. | boundary is clearly marked for the user. | |||
</t> | </t> | |||
<t> | <t> | |||
To distinguish among several outstanding RECEIVEs and to take | To distinguish among several outstanding RECEIVEs and to take | |||
care of the case that a buffer is not completely filled, the | care of the case that a buffer is not completely filled, the | |||
return code is accompanied by both a buffer pointer and a byte | return code is accompanied by both a buffer pointer and a byte | |||
count indicating the actual length of the data received. | count indicating the actual length of the data received. | |||
</t> | </t> | |||
<t> | <t> | |||
Alternative implementations of RECEIVE might have the TCP endpoint | Alternative implementations of RECEIVE might have the TCP endpoint | |||
allocate buffer storage, or the TCP endpoint might share a ring buffer | allocate buffer storage, or the TCP endpoint might share a ring buffer | |||
with the user. | with the user. | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Close"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Close</name> | |||
<t> | ||||
<t> | ||||
Format: CLOSE (local connection name) | Format: CLOSE (local connection name) | |||
</t> | </t> | |||
<t> | <t> | |||
This command causes the connection specified to be closed. If | This command causes the connection specified to be closed. If | |||
the connection is not open or the calling process is not | the connection is not open or the calling process is not | |||
authorized to use this connection, an error is returned. | authorized to use this connection, an error is returned. | |||
Closing connections is intended to be a graceful operation in | Closing connections is intended to be a graceful operation in | |||
the sense that outstanding SENDs will be transmitted (and | the sense that outstanding SENDs will be transmitted (and | |||
retransmitted), as flow control permits, until all have been | retransmitted), as flow control permits, until all have been | |||
serviced. Thus, it should be acceptable to make several SEND | serviced. Thus, it should be acceptable to make several SEND | |||
calls, followed by a CLOSE, and expect all the data to be sent | calls, followed by a CLOSE, and expect all the data to be sent | |||
to the destination. It should also be clear that users should | to the destination. It should also be clear that users should | |||
continue to RECEIVE on CLOSING connections, since the remote peer | continue to RECEIVE on CLOSING connections since the remote peer | |||
may be trying to transmit the last of its data. Thus, CLOSE | may be trying to transmit the last of its data. Thus, CLOSE | |||
means "I have no more to send" but does not mean "I will not | means "I have no more to send" but does not mean "I will not | |||
receive any more." It may happen (if the user level protocol is | receive any more." It may happen (if the user-level protocol is | |||
not well-thought-out) that the closing side is unable to get rid | not well thought out) that the closing side is unable to get rid | |||
of all its data before timing out. In this event, CLOSE turns | of all its data before timing out. In this event, CLOSE turns | |||
into ABORT, and the closing TCP peer gives up. | into ABORT, and the closing TCP peer gives up. | |||
</t> | </t> | |||
<t> | <t> | |||
The user may CLOSE the connection at any time on their own | The user may CLOSE the connection at any time on their own | |||
initiative, or in response to various prompts from the TCP implementatio n | initiative, or in response to various prompts from the TCP implementatio n | |||
(e.g., remote close executed, transmission timeout exceeded, | (e.g., remote close executed, transmission timeout exceeded, | |||
destination inaccessible). | destination inaccessible). | |||
</t> | </t> | |||
<t> | <t> | |||
Because closing a connection requires communication with the | Because closing a connection requires communication with the | |||
remote TCP peer, connections may remain in the closing state for a | remote TCP peer, connections may remain in the closing state for a | |||
short time. Attempts to reopen the connection before the TCP peer | short time. Attempts to reopen the connection before the TCP peer | |||
replies to the CLOSE command will result in error responses. | replies to the CLOSE command will result in error responses. | |||
</t> | </t> | |||
<t> | <t> | |||
Close also implies push function. | Close also implies push function. | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Status"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Status</name> | |||
<t> | ||||
Format: STATUS (local connection name) -> status data | <t> | |||
Format: STATUS (local connection name) -> status data | ||||
</t> | </t> | |||
<t> | <t> | |||
This is an implementation dependent user command and could be | This is an implementation-dependent user command and could be | |||
excluded without adverse effect. Information returned would | excluded without adverse effect. Information returned would | |||
typically come from the TCB associated with the connection. | typically come from the TCB associated with the connection. | |||
</t> | </t> | |||
<t> | ||||
<t> | ||||
This command returns a data block containing the following | This command returns a data block containing the following | |||
information: | information: | |||
<list> | </t> | |||
<t>local socket,<vspace /> | <ul spacing="normal" empty="true"> | |||
remote socket,<vspace /> | ||||
local connection name,<vspace /> | <li>local socket,</li> | |||
receive window,<vspace /> | <li> | |||
send window,<vspace /> | remote socket,</li> | |||
connection state,<vspace /> | <li> | |||
number of buffers awaiting acknowledgment,<vspace /> | local connection name,</li> | |||
number of buffers pending receipt,<vspace /> | <li> | |||
urgent state,<vspace /> | receive window,</li> | |||
DiffServ field value,<vspace /> | <li> | |||
security/compartment,<vspace /> | send window,</li> | |||
and transmission timeout.</t> | <li> | |||
</list> | connection state,</li> | |||
</t> | <li> | |||
<t> | number of buffers awaiting acknowledgment,</li> | |||
<li> | ||||
number of buffers pending receipt,</li> | ||||
<li> | ||||
urgent state,</li> | ||||
<li> | ||||
Diffserv field value,</li> | ||||
<li> | ||||
security/compartment, and</li> | ||||
<li> | ||||
transmission timeout.</li> | ||||
</ul> | ||||
<t> | ||||
Depending on the state of the connection, or on the | Depending on the state of the connection, or on the | |||
implementation itself, some of this information may not be | implementation itself, some of this information may not be | |||
available or meaningful. If the calling process is not | available or meaningful. If the calling process is not | |||
authorized to use this connection, an error is returned. This | authorized to use this connection, an error is returned. This | |||
prevents unauthorized processes from gaining information about a | prevents unauthorized processes from gaining information about a | |||
connection. | connection. | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Abort"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Abort</name> | |||
<t> | ||||
<t> | ||||
Format: ABORT (local connection name) | Format: ABORT (local connection name) | |||
</t> | </t> | |||
<t> | <t> | |||
This command causes all pending SENDs and RECEIVES to be | This command causes all pending SENDs and RECEIVES to be | |||
aborted, the TCB to be removed, and a special RESET message to | aborted, the TCB to be removed, and a special RST message to | |||
be sent to the remote TCP peer of the connection. | be sent to the remote TCP peer of the connection. | |||
Depending on the implementation, users may receive abort | Depending on the implementation, users may receive abort | |||
indications for each outstanding SEND or RECEIVE, or may simply | indications for each outstanding SEND or RECEIVE, or may simply | |||
receive an ABORT-acknowledgment. | receive an ABORT-acknowledgment. | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Flush"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
<list> | <name>Flush</name> | |||
<t> | ||||
<t> | ||||
Some TCP implementations have included a FLUSH call, which | Some TCP implementations have included a FLUSH call, which | |||
will empty the TCP send queue of any data that the user | will empty the TCP send queue of any data that the user | |||
has issued SEND calls for but is still to the right of the | has issued SEND calls for but is still to the right of the | |||
current send window. That is, it flushes as much queued | current send window. That is, it flushes as much queued | |||
send data as possible without losing sequence number | send data as possible without losing sequence number | |||
synchronization. The FLUSH call MAY be implemented (MAY-14). | synchronization. The FLUSH call <bcp14>MAY</bcp14> be implemented ( MAY-14). | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Asynchronous Reports"> | </section> | |||
<t> | <section anchor="asynchronous-reports" numbered="true" toc="default"> | |||
<list> | <name>Asynchronous Reports</name> | |||
<t> | ||||
There MUST be a mechanism for reporting soft TCP error | <t> | |||
There <bcp14>MUST</bcp14> be a mechanism for reporting soft TCP erro | ||||
r | ||||
conditions to the application (MUST-47). Generically, we assume thi s | conditions to the application (MUST-47). Generically, we assume thi s | |||
takes the form of an application-supplied ERROR_REPORT | takes the form of an application-supplied ERROR_REPORT | |||
routine that may be upcalled asynchronously from | routine that may be upcalled asynchronously from | |||
the transport layer: | the transport layer: | |||
<list><t> | </t> | |||
<ul spacing="normal" empty="true"> | ||||
<li> | ||||
ERROR_REPORT(local connection name, reason, subreason) | ERROR_REPORT(local connection name, reason, subreason) | |||
</t></list> | </li> | |||
</ul> | ||||
<t> | ||||
The precise encoding of the reason and subreason parameters | The precise encoding of the reason and subreason parameters | |||
is not specified here. However, the conditions that are | is not specified here. However, the conditions that are | |||
reported asynchronously to the application MUST include: | reported asynchronously to the application <bcp14>MUST</bcp14> inclu | |||
<list><t> | de: | |||
* ICMP error message arrived (see <xref target="icmp"/> for descr | </t> | |||
iption of handling each ICMP message type, since some message types need to be s | <ul spacing="normal"> | |||
uppressed from generating reports to the application) | <li> | |||
</t><t> | ICMP error message arrived (see <xref target="icmp" format="def | |||
* Excessive retransmissions (see <xref target="connfail"/>) | ault"/> for description of handling each ICMP message type since some message ty | |||
</t><t> | pes need to be suppressed from generating reports to the application) | |||
* Urgent pointer advance (see <xref target="urgent"/>) | </li> | |||
</t></list> | <li> | |||
Excessive retransmissions (see <xref target="connfail" format="d | ||||
efault"/>) | ||||
</li> | ||||
<li> | ||||
Urgent pointer advance (see <xref target="urgent" format="defau | ||||
lt"/>) | ||||
</li> | ||||
</ul> | ||||
<t> | ||||
However, an application program that does not want to | However, an application program that does not want to | |||
receive such ERROR_REPORT calls SHOULD be able to | receive such ERROR_REPORT calls <bcp14>SHOULD</bcp14> be able to | |||
effectively disable these calls (SHLD-20). | effectively disable these calls (SHLD-20). | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
<section title="Set Differentiated Services Field (IPv4 TOS or IPv6 Traffic Clas | </section> | |||
s)"> | <section numbered="true" toc="default"> | |||
<t> | <name>Set Differentiated Services Field (IPv4 TOS or IPv6 Traffic Cl | |||
<list> | ass)</name> | |||
<t> | ||||
The application layer MUST be able to specify the Differentiated Ser | <t> | |||
vices field | The application layer <bcp14>MUST</bcp14> be able to specify the Dif | |||
for segments that are sent on a connection (MUST-48). The Different | ferentiated Services field | |||
iated Services field includes the 6-bit Differentiated Services Code Point (DSCP | for segments that are sent on a connection (MUST-48). The Different | |||
) value. | iated Services field includes the 6-bit Differentiated Services Codepoint (DSCP) | |||
It is not required, but the application SHOULD be able to | value. | |||
change the Differentiated Services field during the connection lifet | It is not required, but the application <bcp14>SHOULD</bcp14> be abl | |||
ime (SHLD-21). TCP implementations SHOULD | e to | |||
change the Differentiated Services field during the connection lifet | ||||
ime (SHLD-21). TCP implementations <bcp14>SHOULD</bcp14> | ||||
pass the current Differentiated Services field value without change to the IP layer, | pass the current Differentiated Services field value without change to the IP layer, | |||
when it sends segments on the connection (SHLD-22). | when it sends segments on the connection (SHLD-22). | |||
</t> | </t> | |||
<t> | <t> | |||
The Differentiated Services field will be specified independently in each direction on | The Differentiated Services field will be specified independently in each direction on | |||
the connection, so that the receiver application will | the connection, so that the receiver application will | |||
specify the Differentiated Services field used for ACK segments. | specify the Differentiated Services field used for ACK segments. | |||
</t> | </t> | |||
<t> | <t> | |||
TCP implementations MAY pass the most recently received Differentiat | TCP implementations <bcp14>MAY</bcp14> pass the most recently receiv | |||
ed Services field up to the | ed Differentiated Services field up to the | |||
application (MAY-9). | application (MAY-9). | |||
</t> | </t> | |||
</list> | ||||
</t> | ||||
</section> | ||||
</section> | </section> | |||
<section title="TCP/Lower-Level Interface"> | </section> | |||
<t> | <section numbered="true" toc="default"> | |||
The TCP endpoint calls on a lower level protocol module to actually send and | <name>TCP/Lower-Level Interface</name> | |||
receive information over a network. The two current standard Internet Proto | <t> | |||
col (IP) versions layered below TCP are IPv4 <xref target="RFC0791"/> and IPv6 < | The TCP endpoint calls on a lower-level protocol module to actually send and | |||
xref target="RFC8200"/>. | receive information over a network. The two current standard Internet Proto | |||
col (IP) versions layered below TCP are IPv4 <xref target="RFC0791" format="defa | ||||
ult"/> and IPv6 <xref target="RFC8200" format="default"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
If the lower level protocol is IPv4 it provides arguments for a type | If the lower-level protocol is IPv4, it provides arguments for a type | |||
of service (used within the Differentiated Services field) and for a time to live. TCP uses the following settings | of service (used within the Differentiated Services field) and for a time to live. TCP uses the following settings | |||
for these parameters: | for these parameters: | |||
<list> | </t> | |||
<t> | <dl> | |||
DiffServ field: The IP header value for the DiffServ field is given by the | <dt> | |||
user. This includes the bits of the DiffServ Code Point (DSCP). | Diffserv field:</dt><dd>The IP header value for the Diffserv field is give | |||
</t> | n by the user. This includes the bits of the Diffserv Codepoint (DSCP). | |||
<t> | </dd> | |||
Time to Live (TTL): The TTL value used to send TCP segments MUST be config | <dt> | |||
urable (MUST-49). | Time to Live (TTL):</dt><dd><t>The TTL value used to send TCP segments <bc | |||
<list> | p14>MUST</bcp14> be configurable (MUST-49). | |||
<t> | </t> | |||
Note that RFC 793 specified one minute (60 seconds) as a constant for | <ul spacing="normal"> | |||
the TTL, because the assumed maximum segment lifetime was two minutes. This was | <li> | |||
intended to explicitly ask that a segment be destroyed if it cannot be | Note that RFC 793 specified one minute (60 seconds) as a constant for | |||
delivered by the internet system within one minute. RFC 1122 changed this speci | the TTL because the assumed maximum segment lifetime was two minutes. This was | |||
fication to require that the TTL be configurable. | intended to explicitly ask that a segment be destroyed if it could not be | |||
</t> | delivered by the internet system within one minute. RFC 1122 updated RFC 793 to | |||
<t> | require that the TTL be configurable. | |||
Note that the DiffServ field is permitted to change during a connection | </li> | |||
(Section 4.2.4.2 of RFC 1122). However, the application interface might | <li> | |||
Note that the Diffserv field is permitted to change during a connection | ||||
(Section <xref target="RFC1122" section="4.2.4.2" sectionFormat="bare" fo | ||||
rmat="default"/> of RFC 1122). However, the application interface might | ||||
not support this ability, and the application does not have knowledge | not support this ability, and the application does not have knowledge | |||
about individual TCP segments, so this can only be done on a coarse | about individual TCP segments, so this can only be done on a coarse | |||
granularity, at best. This limitation is further discussed in RFC 7657 | granularity, at best. This limitation is further discussed in RFC 7657 | |||
(sec 5.1, 5.3, and 6) <xref target="RFC7657"/>. Generally, an | (Sections <xref target="RFC7657" section="5.1" sectionFormat="bare" forma | |||
application SHOULD NOT change the DiffServ field value during the course | t="default"/>, <xref target="RFC7657" section="5.3" sectionFormat="bare" format= | |||
"default"/>, and <xref target="RFC7657" section="6" sectionFormat="bare" format= | ||||
"default"/>) <xref target="RFC7657" format="default"/>. Generally, an | ||||
application <bcp14>SHOULD NOT</bcp14> change the Diffserv field value dur | ||||
ing the course | ||||
of a connection (SHLD-23). | of a connection (SHLD-23). | |||
</t> | </li> | |||
</list> | ||||
</t> | </ul> | |||
</list></t> | </dd> | |||
<t> | </dl> | |||
Any lower level protocol will have to provide the source address, | <t> | |||
Any lower-level protocol will have to provide the source address, | ||||
destination address, and protocol fields, and some way to determine | destination address, and protocol fields, and some way to determine | |||
the "TCP length", both to provide the functional equivalent servic e | the "TCP length", both to provide the functional equivalent service | |||
of IP and to be used in the TCP checksum. | of IP and to be used in the TCP checksum. | |||
</t> | </t> | |||
<t> | <t> | |||
When received options are passed up to TCP from the IP | When received options are passed up to TCP from the IP | |||
layer, a TCP implementation MUST ignore options that it does not understand (MUST-50). | layer, a TCP implementation <bcp14>MUST</bcp14> ignore options that it does not understand (MUST-50). | |||
</t> | </t> | |||
<t> | <t> | |||
A TCP implementation MAY support the Time Stamp (MAY-10) and Record Route (M | A TCP implementation <bcp14>MAY</bcp14> support the Timestamp (MAY-10) and R | |||
AY-11) options. | ecord Route (MAY-11) Options. | |||
</t> | </t> | |||
<section numbered="true" toc="default"> | ||||
<section title="Source Routing"> | <name>Source Routing</name> | |||
<t> | <t> | |||
If the lower level is IP (or other protocol that provides this | If the lower level is IP (or other protocol that provides this | |||
feature) and source routing is used, the interface must allow the | feature) and source routing is used, the interface must allow the | |||
route information to be communicated. This is especially important | route information to be communicated. This is especially important | |||
so that the source and destination addresses used in the TCP | so that the source and destination addresses used in the TCP | |||
checksum be the originating source and ultimate destination. It is | checksum be the originating source and ultimate destination. It is | |||
also important to preserve the return route to answer connection | also important to preserve the return route to answer connection | |||
requests. | requests. | |||
</t> | </t> | |||
<t> | <t> | |||
An application MUST be able to specify a source route when | An application <bcp14>MUST</bcp14> be able to specify a source route when | |||
it actively opens a TCP connection (MUST-51), and this MUST take | it actively opens a TCP connection (MUST-51), and this <bcp14>MUST</bcp14> t | |||
ake | ||||
precedence over a source route received in a datagram (MUST-52). | precedence over a source route received in a datagram (MUST-52). | |||
</t> | </t> | |||
<t> | <t> | |||
When a TCP connection is OPENed passively and a packet | When a TCP connection is OPENed passively and a packet | |||
arrives with a completed IP Source Route option (containing | arrives with a completed IP Source Route Option (containing | |||
a return route), TCP implementations MUST save the return route and use it | a return route), TCP implementations <bcp14>MUST</bcp14> save the return rou | |||
te and use it | ||||
for all segments sent on this connection (MUST-53). If a different | for all segments sent on this connection (MUST-53). If a different | |||
source route arrives in a later segment, the later | source route arrives in a later segment, the later | |||
definition SHOULD override the earlier one (SHLD-24). | definition <bcp14>SHOULD</bcp14> override the earlier one (SHLD-24). | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="ICMP Messages" anchor="icmp"> | <section anchor="icmp" numbered="true" toc="default"> | |||
<t> | <name>ICMP Messages</name> | |||
TCP implementations MUST act on an ICMP error message passed up from | <t> | |||
the IP | TCP implementations <bcp14>MUST</bcp14> act on an ICMP error message | |||
passed up from the IP | ||||
layer, directing it to the connection that created the | layer, directing it to the connection that created the | |||
error (MUST-54). The necessary demultiplexing information can be | error (MUST-54). The necessary demultiplexing information can be | |||
found in the IP header contained within the ICMP message. | found in the IP header contained within the ICMP message. | |||
</t> | </t> | |||
<t> | <t> | |||
This applies to ICMPv6 in addition to IPv4 ICMP. | This applies to ICMPv6 in addition to IPv4 ICMP. | |||
</t> | </t> | |||
<t> | <t> | |||
<xref target="RFC5461"/> contains discussion of specific ICMP and IC | <xref target="RFC5461" format="default"/> contains discussion of spe | |||
MPv6 messages classified as either "soft" or "hard" errors t | cific ICMP and ICMPv6 messages classified as either "soft" or "hard" errors that | |||
hat may bear different responses. Treatment for classes of ICMP messages is des | may bear different responses. Treatment for classes of ICMP messages is descri | |||
cribed below: | bed below: | |||
</t> | </t> | |||
<t> | <dl newline="true" spacing="normal" indent="2"> | |||
<list style="hanging" hangIndent="2"> | <dt>Source Quench</dt> | |||
<t hangText="Source Quench"><vspace /> | <dd> | |||
TCP implementations MUST silently discard any received ICMP Sou | TCP implementations <bcp14>MUST</bcp14> silently discard any re | |||
rce Quench messages (MUST-55). See <xref target="RFC6633"/> for discussion. | ceived ICMP Source Quench messages (MUST-55). See <xref target="RFC6633" format | |||
</t> | ="default"/> for discussion. | |||
<t hangText="Soft Errors"><vspace /> | </dd> | |||
For IPv4 ICMP these include: Destination Unreachable -- codes 0 | <dt>Soft Errors</dt> | |||
, 1, 5; Time Exceeded -- codes 0, 1; and Parameter Problem.<vspace /> | <dd> | |||
For ICMPv6 these include: Destination Unreachable -- codes 0, 3 | <t> | |||
; Time Exceeded -- codes 0, 1; and Parameter Problem -- codes 0, 1, 2.<vspace / | For IPv4 ICMP, these include: Destination Unreachable -- codes | |||
> | 0, 1, 5; Time Exceeded -- codes 0, 1; and Parameter Problem.</t> | |||
<t> | ||||
For ICMPv6, these include: Destination Unreachable -- codes 0, | ||||
3; Time Exceeded -- codes 0, 1; and Parameter Problem -- codes 0, 1, 2.</t> | ||||
<t> | ||||
Since these Unreachable messages indicate soft error | Since these Unreachable messages indicate soft error | |||
conditions, TCP implementations MUST NOT abort the connection ( | conditions, a TCP implementation <bcp14>MUST NOT</bcp14> abort | |||
MUST-56), and it | the connection (MUST-56), and it | |||
SHOULD make the information available to the | <bcp14>SHOULD</bcp14> make the information available to the | |||
application (SHLD-25). | application (SHLD-25). | |||
</t> | </t> | |||
<t hangText="Hard Errors"><vspace /> | </dd> | |||
For ICMP these include Destination Unreachable -- codes 2-4.<vs | <dt>Hard Errors</dt> | |||
pace /> | <dd> | |||
<t> | ||||
For ICMP these include Destination Unreachable -- codes 2-4.</t | ||||
> | ||||
<t> | ||||
These are hard error conditions, so TCP implementations SHOULD | These are hard error conditions, so TCP implementations <bcp14> | |||
abort | SHOULD</bcp14> abort | |||
the connection (SHLD-26). <xref target="RFC5461"/> notes that | the connection (SHLD-26). <xref target="RFC5461" format="defau | |||
lt"/> notes that | ||||
some implementations do not abort connections when an | some implementations do not abort connections when an | |||
ICMP hard error is received for a connection that is | ICMP hard error is received for a connection that is | |||
in any of the synchronized states. | in any of the synchronized states. | |||
</t> | </t> | |||
</list> | </dd> | |||
</t> | </dl> | |||
<t> | <t> | |||
Note that <xref target="RFC5461"/> section 4 describes widespread implementation | Note that <xref target="RFC5461" section="4" sectionFormat="comma" format="defau | |||
behavior that treats soft errors as hard errors during connection establishment | lt"/> describes widespread implementation behavior that treats soft errors as ha | |||
. | rd errors during connection establishment. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Source Address Validation"> | <section numbered="true" toc="default"> | |||
<t> | <name>Source Address Validation</name> | |||
<t> | ||||
RFC 1122 requires addresses to be validated in incoming SYN packets: | RFC 1122 requires addresses to be validated in incoming SYN packets: | |||
<list> | ||||
<t> | ||||
An incoming SYN with an invalid source address MUST be | ||||
ignored either by TCP or by the IP layer (MUST-63) (Section | ||||
3.2.1.3 of <xref target="RFC1122"/>). | ||||
</t> | </t> | |||
<t> | <blockquote> | |||
A TCP implementation MUST silently discard an incoming SYN | <t> | |||
segment that is addressed to a broadcast or multicast | An incoming SYN with an invalid source address <bcp14>MUST</bcp14> b | |||
address (MUST-57). | e | |||
ignored either by TCP or by the IP layer [(MUST-63)] (see Section | ||||
<xref target="RFC1122" section="3.2.1.3" sectionFormat="bare"/>). | ||||
</t> | </t> | |||
</list> | <t> | |||
A TCP implementation <bcp14>MUST</bcp14> silently discard an incomin | ||||
g SYN | ||||
segment that is addressed to a broadcast or multicast | ||||
address [(MUST-57)]. | ||||
</t> | </t> | |||
<t>This prevents connection state and replies from being erroneously generated, | </blockquote> | |||
and implementers should note that this guidance is applicable to all incoming se | ||||
gments, not just SYNs, as specifically indicated in RFC 1122.</t> | <t>This prevents connection state and replies from being erroneously | |||
</section> | generated, and implementers should note that this guidance is applicable to all | |||
</section> | incoming segments, not just SYNs, as specifically indicated in RFC 1122.</t> | |||
</section> | </section> | |||
<section title="Event Processing"> | </section> | |||
<t> | </section> | |||
<section numbered="true" toc="default"> | ||||
<name>Event Processing</name> | ||||
<t> | ||||
The processing depicted in this section is an example of one possible | The processing depicted in this section is an example of one possible | |||
implementation. Other implementations may have slightly different | implementation. Other implementations may have slightly different | |||
processing sequences, but they should differ from those in this | processing sequences, but they should differ from those in this | |||
section only in detail, not in substance. | section only in detail, not in substance. | |||
</t> | </t> | |||
<t> | <t> | |||
The activity of the TCP endpoint can be characterized as responding to events. | The activity of the TCP endpoint can be characterized as responding to events. | |||
The events that occur can be cast into three categories: user calls, | The events that occur can be cast into three categories: user calls, | |||
arriving segments, and timeouts. This section describes the | arriving segments, and timeouts. This section describes the | |||
processing the TCP endpoint does in response to each of the events. In many | processing the TCP endpoint does in response to each of the events. In many | |||
cases the processing required depends on the state of the connection. | cases, the processing required depends on the state of the connection. | |||
</t> | </t> | |||
<t> | <t> | |||
Events that occur: | Events that occur: | |||
<list> | </t> | |||
<t>User Calls | <ul spacing="normal" empty="true"> | |||
<list> | <li> | |||
<t>OPEN<vspace /> | <t>User Calls</t> | |||
SEND<vspace /> | <ul spacing="normal" empty="true"> | |||
RECEIVE<vspace /> | <li>OPEN</li> | |||
CLOSE<vspace /> | <li>SEND</li> | |||
ABORT<vspace /> | <li>RECEIVE</li> | |||
STATUS</t> | <li>CLOSE</li> | |||
</list> | <li>ABORT</li> | |||
</t> | <li>STATUS</li> | |||
<t>Arriving Segments | </ul> | |||
<list><t>SEGMENT ARRIVES</t></list> | </li> | |||
</t> | <li> | |||
<t>Timeouts | <t>Arriving Segments</t> | |||
<list> | <ul spacing="normal" empty="true"> | |||
<t>USER TIMEOUT<vspace /> | <li>SEGMENT ARRIVES</li> | |||
RETRANSMISSION TIMEOUT<vspace /> | </ul> | |||
TIME-WAIT TIMEOUT<vspace /> | </li> | |||
</t></list> | <li> | |||
</t> | <t>Timeouts</t> | |||
</list> | <ul spacing="normal" empty="true"> | |||
</t> | <li>USER TIMEOUT</li> | |||
<t> | <li>RETRANSMISSION TIMEOUT</li> | |||
<li>TIME-WAIT TIMEOUT</li> | ||||
</ul> | ||||
</li> | ||||
</ul> | ||||
<t> | ||||
The model of the TCP/user interface is that user commands receive an | The model of the TCP/user interface is that user commands receive an | |||
immediate return and possibly a delayed response via an event or | immediate return and possibly a delayed response via an event or | |||
pseudo interrupt. In the following descriptions, the term "signal" | pseudo-interrupt. In the following descriptions, the term "signal" | |||
means cause a delayed response. | means cause a delayed response. | |||
</t> | </t> | |||
<t> | <t> | |||
Error responses in this document are identified by character strings. For exa mple, user | Error responses in this document are identified by character strings. For exa mple, user | |||
commands referencing connections that do not exist receive "error: | commands referencing connections that do not exist receive "error: | |||
connection not open". | connection not open". | |||
</t> | </t> | |||
<t> | <t> | |||
Please note in the following that all arithmetic on sequence numbers, | Please note in the following that all arithmetic on sequence numbers, | |||
acknowledgment numbers, windows, et cetera, is modulo 2**32 (the size | acknowledgment numbers, windows, et cetera, is modulo 2<sup>32</sup> (the size | |||
of the sequence number space). Also note that "=<" means less than or | of the sequence number space). Also note that "=<" means less than or | |||
equal to (modulo 2**32). | equal to (modulo 2<sup>32</sup>). | |||
</t> | </t> | |||
<t> | <t> | |||
A natural way to think about processing incoming segments is to | A natural way to think about processing incoming segments is to | |||
imagine that they are first tested for proper sequence number (i.e., | imagine that they are first tested for proper sequence number (i.e., | |||
that their contents lie in the range of the expected "receive window" | that their contents lie in the range of the expected "receive window" | |||
in the sequence number space) and then that they are generally queued | in the sequence number space) and then that they are generally queued | |||
and processed in sequence number order. | and processed in sequence number order. | |||
</t> | </t> | |||
<t> | <t> | |||
When a segment overlaps other already received segments we reconstruct | When a segment overlaps other already received segments, we reconstruct | |||
the segment to contain just the new data, and adjust the header fields | the segment to contain just the new data and adjust the header fields | |||
to be consistent. | to be consistent. | |||
</t> | </t> | |||
<t> | <t> | |||
Note that if no state change is mentioned the TCP connection stays in the same | Note that if no state change is mentioned, the TCP connection stays in the sam | |||
e | ||||
state. | state. | |||
</t> | </t> | |||
<section title="OPEN Call"> | <section numbered="true" toc="default"> | |||
<t> | <name>OPEN Call</name> | |||
<list> | <t>CLOSED STATE (i.e., TCB does not exist) | |||
<t>CLOSED STATE (i.e., TCB does not exist) | </t> | |||
<list> | <ul spacing="normal"> | |||
<t> | <li> | |||
Create a new transmission control block (TCB) to hold connection | Create a new transmission control block (TCB) to hold connection | |||
state information. Fill in local socket identifier, remote | state information. Fill in local socket identifier, remote | |||
socket, DiffServ field, security/compartment, and user timeout | socket, Diffserv field, security/compartment, and user timeout | |||
information. Note that some parts of the remote socket may be | information. Note that some parts of the remote socket may be | |||
unspecified in a passive OPEN and are to be filled in by the | unspecified in a passive OPEN and are to be filled in by the | |||
parameters of the incoming SYN segment. Verify the security and | parameters of the incoming SYN segment. Verify the security and | |||
DiffServ value requested are allowed for this user, if not return | Diffserv value requested are allowed for this user, if not, return | |||
"error: DiffServ value not allowed" or "error: security/c | "error: Diffserv value not allowed" or "error: security/compartment | |||
ompartment | not allowed". If passive, enter the LISTEN state and return. If | |||
not allowed." If passive enter the LISTEN state and return. If | active and the remote socket is unspecified, return "error: | |||
active and the remote socket is unspecified, return "error: | remote socket unspecified"; if active and the remote socket is | |||
remote socket unspecified"; if active and the remote socket is | ||||
specified, issue a SYN segment. An initial send sequence number | specified, issue a SYN segment. An initial send sequence number | |||
(ISS) is selected. A SYN segment of the form <SEQ=ISS><CTL=SYN> | (ISS) is selected. A SYN segment of the form <SEQ=ISS><CTL=SYN&g t; | |||
is sent. Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT | is sent. Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT | |||
state, and return. | state, and return. | |||
</t> | </li> | |||
<t> | <li> | |||
If the caller does not have access to the local socket specified, | If the caller does not have access to the local socket specified, | |||
return "error: connection illegal for this process". If there | return "error: connection illegal for this process". If there is | |||
is | no room to create a new connection, return "error: insufficient | |||
no room to create a new connection, return "error: insufficient | resources". | |||
resources". | </li> | |||
</t> | </ul> | |||
</list> | <t>LISTEN STATE | |||
</t> | </t> | |||
<t>LISTEN STATE | <ul spacing="normal"> | |||
<list> | <li> | |||
<t> | ||||
If the OPEN call is active and the remote socket is specified, then change the | If the OPEN call is active and the remote socket is specified, then change the | |||
connection from passive to active, select an ISS. Send a SYN | connection from passive to active, select an ISS. Send a SYN | |||
segment, set SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT | segment, set SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT | |||
state. Data associated with SEND may be sent with SYN segment or | state. Data associated with SEND may be sent with SYN segment or | |||
queued for transmission after entering ESTABLISHED state. The | queued for transmission after entering ESTABLISHED state. The | |||
urgent bit if requested in the command must be sent with the data | urgent bit if requested in the command must be sent with the data | |||
segments sent as a result of this command. If there is no room to | segments sent as a result of this command. If there is no room to | |||
queue the request, respond with "error: insufficient resources". | queue the request, respond with "error: insufficient resources". | |||
If the remote socket was not specified, then return "error: remote | If the remote socket was not specified, then return "error: remote | |||
socket unspecified". | socket unspecified". | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t>SYN-SENT STATE</t> | |||
<t><vspace blankLines="999"/></t> | <t> | |||
<t>SYN-SENT STATE<vspace /> | SYN-RECEIVED STATE</t> | |||
SYN-RECEIVED STATE<vspace /> | <t> | |||
ESTABLISHED STATE<vspace /> | ESTABLISHED STATE</t> | |||
FIN-WAIT-1 STATE<vspace /> | <t> | |||
FIN-WAIT-2 STATE<vspace /> | FIN-WAIT-1 STATE</t> | |||
CLOSE-WAIT STATE<vspace /> | <t> | |||
CLOSING STATE<vspace /> | FIN-WAIT-2 STATE</t> | |||
LAST-ACK STATE<vspace /> | <t> | |||
CLOSE-WAIT STATE</t> | ||||
<t> | ||||
CLOSING STATE</t> | ||||
<t> | ||||
LAST-ACK STATE</t> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t>Return "error: connection already exists".</t> | <ul spacing="normal"> | |||
</list> | <li>Return "error: connection already exists".</li> | |||
</t> | </ul> | |||
</list> | </section> | |||
</t> | <section numbered="true" toc="default"> | |||
</section> | <name>SEND Call</name> | |||
<section title="SEND Call"> | <t> | |||
<t> | ||||
<list> | ||||
<t> | ||||
CLOSED STATE (i.e., TCB does not exist) | CLOSED STATE (i.e., TCB does not exist) | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the user does not have access to such a connection, then return | If the user does not have access to such a connection, then return | |||
"error: connection illegal for this process". | "error: connection illegal for this process". | |||
</t> | </li> | |||
<t> | <li> | |||
Otherwise, return "error: connection does not exist". | Otherwise, return "error: connection does not exist". | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | ||||
LISTEN STATE | LISTEN STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the remote socket is specified, then change the connection | If the remote socket is specified, then change the connection | |||
from passive to active, select an ISS. Send a SYN segment, set | from passive to active, select an ISS. Send a SYN segment, set | |||
SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT state. Data | SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT state. Data | |||
associated with SEND may be sent with SYN segment or queued for | associated with SEND may be sent with SYN segment or queued for | |||
transmission after entering ESTABLISHED state. The urgent bit if | transmission after entering ESTABLISHED state. The urgent bit if | |||
requested in the command must be sent with the data segments sent | requested in the command must be sent with the data segments sent | |||
as a result of this command. If there is no room to queue the | as a result of this command. If there is no room to queue the | |||
request, respond with "error: insufficient resources". If | request, respond with "error: insufficient resources". If | |||
the remote socket was not specified, then return "error: remote | the remote socket was not specified, then return "error: remote | |||
socket unspecified". | socket unspecified". | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | SYN-SENT STATE</t> | |||
SYN-SENT STATE<vspace /> | <t> | |||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list><t> | </t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
Queue the data for transmission after entering ESTABLISHED state. | Queue the data for transmission after entering ESTABLISHED state. | |||
If no space to queue, respond with "error: insufficient | If no space to queue, respond with "error: insufficient | |||
resources". | resources". | |||
</t></list> | </li> | |||
</t> | </ul> | |||
<t> | <t> | |||
ESTABLISHED STATE<vspace /> | ESTABLISHED STATE</t> | |||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list><t> | </t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
Segmentize the buffer and send it with a piggybacked | Segmentize the buffer and send it with a piggybacked | |||
acknowledgment (acknowledgment value = RCV.NXT). If there is | acknowledgment (acknowledgment value = RCV.NXT). If there is | |||
insufficient space to remember this buffer, simply return "error: | insufficient space to remember this buffer, simply return "error: | |||
insufficient resources". | insufficient resources". | |||
</t> | </li> | |||
<t> | <li> | |||
If the urgent flag is set, then SND.UP <- SND.NXT and set the | If the URGENT flag is set, then SND.UP <- SND.NXT and set the | |||
urgent pointer in the outgoing segments. | urgent pointer in the outgoing segments. | |||
</t></list> | </li> | |||
</t> | </ul> | |||
<t> | <t> | |||
FIN-WAIT-1 STATE<vspace /> | FIN-WAIT-1 STATE</t> | |||
FIN-WAIT-2 STATE<vspace /> | <t> | |||
CLOSING STATE<vspace /> | FIN-WAIT-2 STATE</t> | |||
LAST-ACK STATE<vspace /> | <t> | |||
CLOSING STATE</t> | ||||
<t> | ||||
LAST-ACK STATE</t> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list><t> | </t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
Return "error: connection closing" and do not service request. | Return "error: connection closing" and do not service request. | |||
</t></list> | </li> | |||
</t> | </ul> | |||
</list> | </section> | |||
</t> | <section numbered="true" toc="default"> | |||
</section> | <name>RECEIVE Call</name> | |||
<section title="RECEIVE Call"> | <t> | |||
<t> | ||||
<list> | ||||
<t> | ||||
CLOSED STATE (i.e., TCB does not exist) | CLOSED STATE (i.e., TCB does not exist) | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the user does not have access to such a connection, return | If the user does not have access to such a connection, return | |||
"error: connection illegal for this process". | "error: connection illegal for this process". | |||
</t> | </li> | |||
<t> | <li> | |||
Otherwise return "error: connection does not exist". | Otherwise, return "error: connection does not exist". | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | <t> | |||
LISTEN STATE<vspace /> | LISTEN STATE</t> | |||
SYN-SENT STATE<vspace /> | <t> | |||
SYN-SENT STATE</t> | ||||
<t> | ||||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Queue for processing after entering ESTABLISHED state. If there | Queue for processing after entering ESTABLISHED state. If there | |||
is no room to queue this request, respond with "error: | is no room to queue this request, respond with "error: | |||
insufficient resources". | insufficient resources". | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | <t> | |||
ESTABLISHED STATE<vspace /> | ESTABLISHED STATE</t> | |||
FIN-WAIT-1 STATE<vspace /> | <t> | |||
FIN-WAIT-1 STATE</t> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If insufficient incoming segments are queued to satisfy the | If insufficient incoming segments are queued to satisfy the | |||
request, queue the request. If there is no queue space to | request, queue the request. If there is no queue space to | |||
remember the RECEIVE, respond with "error: insufficient | remember the RECEIVE, respond with "error: insufficient | |||
resources". | resources". | |||
</t> | </li> | |||
<t> | <li> | |||
Reassemble queued incoming segments into receive buffer and return | Reassemble queued incoming segments into receive buffer and return | |||
to user. Mark "push seen" (PUSH) if this is the case. | to user. Mark "push seen" (PUSH) if this is the case. | |||
</t> | </li> | |||
<t> | <li> | |||
If RCV.UP is in advance of the data currently being passed to the | If RCV.UP is in advance of the data currently being passed to the | |||
user notify the user of the presence of urgent data. | user, notify the user of the presence of urgent data. | |||
</t> | </li> | |||
<t> | <li> | |||
When the TCP endpoint takes responsibility for delivering data to the user | When the TCP endpoint takes responsibility for delivering data to the user | |||
, | ||||
that fact must be communicated to the sender via an | that fact must be communicated to the sender via an | |||
acknowledgment. The formation of such an acknowledgment is | acknowledgment. The formation of such an acknowledgment is | |||
described below in the discussion of processing an incoming | described below in the discussion of processing an incoming | |||
segment. | segment. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | <t> | |||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Since the remote side has already sent FIN, RECEIVEs must be | Since the remote side has already sent FIN, RECEIVEs must be | |||
satisfied by data already on hand, but not yet delivered to the | satisfied by data already on hand, but not yet delivered to the | |||
user. If no text is awaiting delivery, the RECEIVE will get an | user. If no text is awaiting delivery, the RECEIVE will get an | |||
"error: connection closing" response. Otherwise, any remaining | "error: connection closing" response. Otherwise, any remaining | |||
data can be used to satisfy the RECEIVE. | data can be used to satisfy the RECEIVE. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | <t> | |||
CLOSING STATE<vspace /> | CLOSING STATE</t> | |||
LAST-ACK STATE<vspace /> | <t> | |||
LAST-ACK STATE</t> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Return "error: connection closing". | Return "error: connection closing". | |||
</t> | </li> | |||
</list></t> | </ul> | |||
</list> | </section> | |||
</t> | <section numbered="true" toc="default"> | |||
</section> | <name>CLOSE Call</name> | |||
<section title="CLOSE Call"> | <t> | |||
<t> | ||||
<list> | ||||
<t> | ||||
CLOSED STATE (i.e., TCB does not exist) | CLOSED STATE (i.e., TCB does not exist) | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the user does not have access to such a connection, return | If the user does not have access to such a connection, return | |||
"error: connection illegal for this process". | "error: connection illegal for this process". | |||
</t> | </li> | |||
<t> | <li> | |||
Otherwise, return "error: connection does not exist". | Otherwise, return "error: connection does not exist". | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | <t> | |||
LISTEN STATE | LISTEN STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Any outstanding RECEIVEs are returned with "error: closing" | Any outstanding RECEIVEs are returned with "error: closing" | |||
responses. Delete TCB, enter CLOSED state, and return. | responses. Delete TCB, enter CLOSED state, and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
SYN-SENT STATE | SYN-SENT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Delete the TCB and return "error: closing" responses to any | Delete the TCB and return "error: closing" responses to any | |||
queued SENDs, or RECEIVEs. | queued SENDs, or RECEIVEs. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If no SENDs have been issued and there is no pending data to send, | If no SENDs have been issued and there is no pending data to send, | |||
then form a FIN segment and send it, and enter FIN-WAIT-1 state; | then form a FIN segment and send it, and enter FIN-WAIT-1 state; | |||
otherwise queue for processing after entering ESTABLISHED state. | otherwise, queue for processing after entering ESTABLISHED state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
ESTABLISHED STATE | ESTABLISHED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Queue this until all preceding SENDs have been segmentized, then | Queue this until all preceding SENDs have been segmentized, then | |||
form a FIN segment and send it. In any case, enter FIN-WAIT-1 | form a FIN segment and send it. In any case, enter FIN-WAIT-1 | |||
state. | state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
FIN-WAIT-1 STATE<vspace /> | <t> | |||
FIN-WAIT-1 STATE</t> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Strictly speaking, this is an error and should receive an "error: | Strictly speaking, this is an error and should receive an "error: | |||
connection closing" response. An "ok" response would be | connection closing" response. An "ok" response would be | |||
acceptable, too, as long as a second FIN is not emitted (the first | acceptable, too, as long as a second FIN is not emitted (the first | |||
FIN may be retransmitted though). | FIN may be retransmitted, though). | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Queue this request until all preceding SENDs have been | Queue this request until all preceding SENDs have been | |||
segmentized; then send a FIN segment, enter LAST-ACK state. | segmentized; then send a FIN segment, enter LAST-ACK state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
CLOSING STATE<vspace /> | <t> | |||
LAST-ACK STATE<vspace /> | CLOSING STATE</t> | |||
<t> | ||||
LAST-ACK STATE</t> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Respond with "error: connection closing". | Respond with "error: connection closing". | |||
</t></list></t> | </li> | |||
</list> | </ul> | |||
</t> | </section> | |||
</section> | <section numbered="true" toc="default"> | |||
<section title="ABORT Call"> | <name>ABORT Call</name> | |||
<t> | <t> | |||
<list> | ||||
<t> | ||||
CLOSED STATE (i.e., TCB does not exist) | CLOSED STATE (i.e., TCB does not exist) | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the user should not have access to such a connection, return | If the user should not have access to such a connection, return | |||
"error: connection illegal for this process". | "error: connection illegal for this process". | |||
</t> | </li> | |||
<t> | <li> | |||
Otherwise return "error: connection does not exist". | Otherwise, return "error: connection does not exist". | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
LISTEN STATE | LISTEN STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Any outstanding RECEIVEs should be returned with "error: | Any outstanding RECEIVEs should be returned with "error: | |||
connection reset" responses. Delete TCB, enter CLOSED state, and | connection reset" responses. Delete TCB, enter CLOSED state, and | |||
return. | return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
SYN-SENT STATE | SYN-SENT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
All queued SENDs and RECEIVEs should be given "connection reset" | All queued SENDs and RECEIVEs should be given "connection reset" | |||
notification, delete the TCB, enter CLOSED state, and return. | notification. Delete the TCB, enter CLOSED state, and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
SYN-RECEIVED STATE<vspace /> | <t> | |||
ESTABLISHED STATE<vspace /> | SYN-RECEIVED STATE</t> | |||
FIN-WAIT-1 STATE<vspace /> | <t> | |||
FIN-WAIT-2 STATE<vspace /> | ESTABLISHED STATE</t> | |||
<t> | ||||
FIN-WAIT-1 STATE</t> | ||||
<t> | ||||
FIN-WAIT-2 STATE</t> | ||||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
<t> | ||||
Send a reset segment: | Send a reset segment: | |||
<list> | </t> | |||
<t> | <t> | |||
<SEQ=SND.NXT><CTL=RST> | <SEQ=SND.NXT><CTL=RST> | |||
</t> | </t> | |||
</list></t> | </li> | |||
<t> | <li> | |||
All queued SENDs and RECEIVEs should be given "connection reset" | All queued SENDs and RECEIVEs should be given "connection reset" | |||
notification; all segments queued for transmission (except for the | notification; all segments queued for transmission (except for the | |||
RST formed above) or retransmission should be flushed, delete the | RST formed above) or retransmission should be flushed. Delete the | |||
TCB, enter CLOSED state, and return. | TCB, enter CLOSED state, and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
CLOSING STATE | CLOSING STATE | |||
</t> | ||||
<t> | ||||
LAST-ACK STATE | LAST-ACK STATE | |||
</t> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Respond with "ok" and delete the TCB, enter CLOSED state, and | Respond with "ok" and delete the TCB, enter CLOSED state, and | |||
return. | return. | |||
</t></list></t> | </li> | |||
</list> | </ul> | |||
</t> | </section> | |||
</section> | <section numbered="true" toc="default"> | |||
<section title="STATUS Call"> | <name>STATUS Call</name> | |||
<t> | <t> | |||
<list> | ||||
<t> | ||||
CLOSED STATE (i.e., TCB does not exist) | CLOSED STATE (i.e., TCB does not exist) | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the user should not have access to such a connection, return | If the user should not have access to such a connection, return | |||
"error: connection illegal for this process". | "error: connection illegal for this process". | |||
</t> | </li> | |||
<t> | <li> | |||
Otherwise return "error: connection does not exist". | Otherwise, return "error: connection does not exist". | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
<t> | ||||
LISTEN STATE | LISTEN STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = LISTEN", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = LISTEN" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
SYN-SENT STATE | SYN-SENT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = SYN-SENT", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = SYN-SENT" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = SYN-RECEIVED", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = SYN-RECEIVED" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
ESTABLISHED STATE | ESTABLISHED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = ESTABLISHED", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = ESTABLISHED" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
FIN-WAIT-1 STATE | FIN-WAIT-1 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = FIN-WAIT-1", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = FIN-WAIT-1" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = FIN-WAIT-2", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = FIN-WAIT-2" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = CLOSE-WAIT", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = CLOSE-WAIT" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
CLOSING STATE | CLOSING STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = CLOSING", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = CLOSING" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
LAST-ACK STATE | LAST-ACK STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = LAST-ACK", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = LAST-ACK" and the TCB pointer. | |||
<t> | </li> | |||
</ul> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
Return "state = TIME-WAIT", and the TCB pointer. | <li> | |||
</t></list></t> | Return "state = TIME-WAIT" and the TCB pointer. | |||
</list> | </li> | |||
</t> | </ul> | |||
</section> | </section> | |||
<section title="SEGMENT ARRIVES"> | <section numbered="true" toc="default"> | |||
<name>SEGMENT ARRIVES</name> | ||||
<section title="CLOSED State"> | <section numbered="true" toc="default"> | |||
<t> | <name>CLOSED STATE</name> | |||
If the state is CLOSED (i.e., TCB does not exist) then | <t> | |||
<list> | If the state is CLOSED (i.e., TCB does not exist), then | |||
<t> | </t> | |||
<ul empty="true" spacing="normal"> | ||||
<li> | ||||
all data in the incoming segment is discarded. An incoming | all data in the incoming segment is discarded. An incoming | |||
segment containing a RST is discarded. An incoming segment not | segment containing a RST is discarded. An incoming segment not | |||
containing a RST causes a RST to be sent in response. The | containing a RST causes a RST to be sent in response. The | |||
acknowledgment and sequence field values are selected to make the | acknowledgment and sequence field values are selected to make the | |||
reset sequence acceptable to the TCP endpoint that sent the offending | reset sequence acceptable to the TCP endpoint that sent the offending | |||
segment. | segment. | |||
</t> | </li> | |||
<t> | <li> | |||
<t> | ||||
If the ACK bit is off, sequence number zero is used, | If the ACK bit is off, sequence number zero is used, | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | <li> | |||
</t> | <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | |||
</list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
If the ACK bit is on, | If the ACK bit is on, | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=SEG.ACK><CTL=RST> | <li> | |||
</t></list></t> | <SEQ=SEG.ACK><CTL=RST> | |||
<t> | </li> | |||
</ul> | ||||
</li> | ||||
<li> | ||||
Return. | Return. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
</section> | </section> | |||
<section title="LISTEN State"> | <section numbered="true" toc="default"> | |||
<t> | <name>LISTEN STATE</name> | |||
If the state is LISTEN then | <t> | |||
<list> | If the state is LISTEN, then | |||
<t> | </t> | |||
first check for an RST | <ul empty="true" spacing="normal"> | |||
<list> | <li> | |||
<t> | <t> | |||
An incoming RST segment could not be valid, since | First, check for a RST: | |||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
An incoming RST segment could not be valid since | ||||
it could not have been sent in response to anything sent by this | it could not have been sent in response to anything sent by this | |||
incarnation of the connection. | incarnation of the connection. | |||
An incoming RST should be ignored. Return. | An incoming RST should be ignored. Return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
second check for an ACK | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
Second, check for an ACK: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
Any acknowledgment is bad if it arrives on a connection still in | Any acknowledgment is bad if it arrives on a connection still in | |||
the LISTEN state. An acceptable reset segment should be formed | the LISTEN state. An acceptable reset segment should be formed | |||
for any arriving ACK-bearing segment. The RST should be | for any arriving ACK-bearing segment. The RST should be | |||
formatted as follows: | formatted as follows: | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=SEG.ACK><CTL=RST> | <li> | |||
</t></list></t> | <SEQ=SEG.ACK><CTL=RST> | |||
<t> | </li> | |||
</ul> | ||||
</li> | ||||
<li> | ||||
Return. | Return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
third check for a SYN | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
Third, check for a SYN: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
If the SYN bit is set, check the security. If the | If the SYN bit is set, check the security. If the | |||
security/compartment on the incoming segment does not exactly | security/compartment on the incoming segment does not exactly | |||
match the security/compartment in the TCB then send a reset and | match the security/compartment in the TCB, then send a reset and | |||
return. | return. | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | <li> | |||
</t></list></t> | <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | |||
<t> | </li> | |||
Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ, and any other | ||||
control or text should be queued for processing later. ISS | control or text should be queued for processing later. ISS | |||
should be selected and a SYN segment sent of the form: | should be selected and a SYN segment sent of the form: | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK> | <li> | |||
</t></list></t> | <SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK> | |||
<t> | </li> | |||
</ul> | ||||
</li> | ||||
<li> | ||||
SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection | SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection | |||
state should be changed to SYN-RECEIVED. Note that any other | state should be changed to SYN-RECEIVED. Note that any other | |||
incoming control or data (combined with SYN) will be processed | incoming control or data (combined with SYN) will be processed | |||
in the SYN-RECEIVED state, but processing of SYN and ACK should | in the SYN-RECEIVED state, but processing of SYN and ACK should | |||
not be repeated. If the listen was not fully specified (i.e., | not be repeated. If the listen was not fully specified (i.e., | |||
the remote socket was not fully specified), then the | the remote socket was not fully specified), then the | |||
unspecified fields should be filled in now. | unspecified fields should be filled in now. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
fourth other data or control | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
Fourth, other data or control: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
This should not be reached. Drop the segment and return. Any other con trol or data-bearing segment (not containing SYN) | This should not be reached. Drop the segment and return. Any other con trol or data-bearing segment (not containing SYN) | |||
must have an ACK and thus would have been discarded by the ACK | must have an ACK and thus would have been discarded by the ACK | |||
processing in the second step, unless it was first discarded by | processing in the second step, unless it was first discarded by | |||
RST checking in the first step. | RST checking in the first step. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
</section> | </li> | |||
<section title="SYN-SENT State"> | </ul> | |||
<t> | </section> | |||
If the state is SYN-SENT then | <section numbered="true" toc="default"> | |||
<list> | <name>SYN-SENT STATE</name> | |||
<t> | ||||
first check the ACK bit | ||||
<list> | ||||
<t> | ||||
If the ACK bit is set | ||||
<list> | ||||
<t> | ||||
If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless | ||||
the RST bit is set, if so drop the segment and return) | ||||
<list> | ||||
<t> | <t> | |||
<SEQ=SEG.ACK><CTL=RST> | If the state is SYN-SENT, then | |||
</t></list></t> | </t> | |||
<t> | <ul empty="true" spacing="normal"> | |||
<li> | ||||
<t> | ||||
First, check the ACK bit: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
If the ACK bit is set, | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
If SEG.ACK =< ISS or SEG.ACK > SND.NXT, send a reset (unless | ||||
the RST bit is set, if so drop the segment and return) | ||||
</t> | ||||
<ul spacing="normal" empty="true"> | ||||
<li> | ||||
<SEQ=SEG.ACK><CTL=RST> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
and discard the segment. Return. | and discard the segment. Return. | |||
</t> | </li> | |||
<t> | <li> | |||
If SND.UNA < SEG.ACK =< SND.NXT then the ACK is acceptable. Som | If SND.UNA < SEG.ACK =< SND.NXT, then the ACK is acceptable. So | |||
e deployed TCP code has used the check SEG.ACK == SND.NXT (using "==" | me deployed TCP code has used the check SEG.ACK == SND.NXT (using "==" rather th | |||
rather than "=<", but this is not appropriate when the stack is cap | an "=<"), but this is not appropriate when the stack is capable of sending da | |||
able of sending data on the SYN, because the TCP peer may not accept and acknowl | ta on the SYN because the TCP peer may not accept and acknowledge all of the dat | |||
edge all of the data on the SYN. | a on the SYN. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
second check the RST bit | </ul> | |||
<list> | </li> | |||
<t> | <li> | |||
If the RST bit is set | <t> | |||
<list> | Second, check the RST bit: | |||
<t> | </t> | |||
A potential blind reset attack is described in RFC 5961 <xref target=" | <ul spacing="normal"> | |||
RFC5961"/>. The mitigation described in that document has specific applicabilit | <li> | |||
y explained therein, and is not a substitute for cryptographic protection (e.g. | <t> | |||
IPsec or TCP-AO). A TCP implementation that supports the RFC 5961 mitigation SH | If the RST bit is set, | |||
OULD first check that the sequence number exactly matches RCV.NXT prior to execu | </t> | |||
ting the action in the next paragraph. | <ul spacing="normal"> | |||
</t> | <li> | |||
<t> | A potential blind reset attack is described in RFC 5961 <xref target=" | |||
If the ACK was acceptable then signal the user "error: | RFC5961" format="default"/>. The mitigation described in that document has spec | |||
ific applicability explained therein, and is not a substitute for cryptographic | ||||
protection (e.g., IPsec or TCP-AO). A TCP implementation that supports the miti | ||||
gation described in RFC 5961 <bcp14>SHOULD</bcp14> first check that the sequence | ||||
number exactly matches RCV.NXT prior to executing the action in the next paragr | ||||
aph. | ||||
</li> | ||||
<li> | ||||
If the ACK was acceptable, then signal to the user "error: | ||||
connection reset", drop the segment, enter CLOSED state, | connection reset", drop the segment, enter CLOSED state, | |||
delete TCB, and return. Otherwise (no ACK), drop the segment | delete TCB, and return. Otherwise (no ACK), drop the segment | |||
and return. | and return. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
third check the security | </ul> | |||
<list> | </li> | |||
<t> | <li> | |||
<t> | ||||
Third, check the security: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
If the security/compartment in the segment does not exactly | If the security/compartment in the segment does not exactly | |||
match the security/compartment in the TCB, send a reset | match the security/compartment in the TCB, send a reset: | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If there is an ACK | <li> | |||
<list> | <t> | |||
<t> | If there is an ACK, | |||
<SEQ=SEG.ACK><CTL=RST> | </t> | |||
</t></list></t> | <ul spacing="normal" empty="true"> | |||
<t> | <li> | |||
Otherwise | <SEQ=SEG.ACK><CTL=RST> | |||
<list> | </li> | |||
<t> | </ul> | |||
<SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | </li> | |||
</t></list></t> | <li> | |||
</list></t> | <t> | |||
<t> | Otherwise, | |||
</t> | ||||
<ul spacing="normal" empty="true"> | ||||
<li> | ||||
<SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
If a reset was sent, discard the segment and return. | If a reset was sent, discard the segment and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
fourth check the SYN bit | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
Fourth, check the SYN bit: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
This step should be reached only if the ACK is ok, or there is | This step should be reached only if the ACK is ok, or there is | |||
no ACK, and the segment did not contain a RST. | no ACK, and the segment did not contain a RST. | |||
</t> | </li> | |||
<t> | <li> | |||
If the SYN bit is on and the security/compartment | If the SYN bit is on and the security/compartment | |||
is acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to | is acceptable, then RCV.NXT is set to SEG.SEQ+1, IRS is set to | |||
SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there | SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there | |||
is an ACK), and any segments on the retransmission queue that | is an ACK), and any segments on the retransmission queue that | |||
are thereby acknowledged should be removed. | are thereby acknowledged should be removed. | |||
</t> | </li> | |||
<t> | <li> | |||
If SND.UNA > ISS (our SYN has been ACKed), change the connection | <t> | |||
If SND.UNA > ISS (our SYN has been ACKed), change the connection | ||||
state to ESTABLISHED, form an ACK segment | state to ESTABLISHED, form an ACK segment | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | <li> | |||
</t></list></t> | <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | |||
<t> | </li> | |||
</ul> | ||||
</li> | ||||
<li> | ||||
and send it. Data or controls that were queued for | and send it. Data or controls that were queued for | |||
transmission MAY be included. Some TCP implementations suppress | transmission <bcp14>MAY</bcp14> be included. Some TCP implementations su ppress | |||
sending this segment when the received segment contains data that will | sending this segment when the received segment contains data that will | |||
anyways generate an acknowledgement in the later processing steps, | anyways generate an acknowledgment in the later processing steps, | |||
saving this extra acknowledgement of the SYN from being sent. If there | saving this extra acknowledgment of the SYN from being sent. If there | |||
are other controls or text in the segment then continue processing at | are other controls or text in the segment, then continue processing at | |||
the sixth step under <xref target="other-states"/> where the URG | the <xref target="check-urg-bit" format="none">sixth step</xref> under <x | |||
bit is checked, otherwise return. | ref target="other-states" format="default"/> where the URG | |||
</t> | bit is checked; otherwise, return. | |||
<t> | </li> | |||
Otherwise enter SYN-RECEIVED, form a SYN,ACK segment | <li> | |||
<list> | <t> | |||
<t> | Otherwise, enter SYN-RECEIVED, form a SYN,ACK segment | |||
<SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK> | </t> | |||
</t></list></t> | <ul spacing="normal" empty="true"> | |||
<t> | <li> | |||
<SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
<t> | ||||
and send it. Set the variables: | and send it. Set the variables: | |||
<list> | </t> | |||
<t>SND.WND <- SEG.WND<vspace /> | <ul spacing="normal" empty="true"> | |||
SND.WL1 <- SEG.SEQ<vspace /> | <li> | |||
<t>SND.WND <- SEG.WND</t> | ||||
<t> | ||||
SND.WL1 <- SEG.SEQ</t> | ||||
<t> | ||||
SND.WL2 <- SEG.ACK</t> | SND.WL2 <- SEG.ACK</t> | |||
</list> | </li> | |||
</ul> | ||||
<t> | ||||
If there are other controls or text in the | If there are other controls or text in the | |||
segment, queue them for processing after the ESTABLISHED state | segment, queue them for processing after the ESTABLISHED state | |||
has been reached, return. | has been reached, return. | |||
</t> | </t> | |||
<t> | </li> | |||
Note that it is legal to send and receive application data on SYN segment | <li> | |||
s (this is the "text in the segment" mentioned above. There has been | Note that it is legal to send and receive application data on SYN segment | |||
significant misinformation and misunderstanding of this topic historically. Som | s (this is the "text in the segment" mentioned above). There has been significa | |||
e firewalls and security devices consider this suspicious. However, the capabil | nt misinformation and misunderstanding of this topic historically. Some firewal | |||
ity was used in T/TCP <xref target="RFC1644"/> and is used in TCP Fast Open (TFO | ls and security devices consider this suspicious. However, the capability was u | |||
) <xref target="RFC7413"/>, so is important for implementations and network devi | sed in T/TCP <xref target="RFC1644" format="default"/> and is used in TCP Fast O | |||
ces to permit. | pen (TFO) <xref target="RFC7413" format="default"/>, so is important for impleme | |||
</t></list></t> | ntations and network devices to permit. | |||
<t> | </li> | |||
fifth, if neither of the SYN or RST bits is set then drop the | </ul> | |||
</li> | ||||
<li> | ||||
Fifth, if neither of the SYN or RST bits is set, then drop the | ||||
segment and return. | segment and return. | |||
</t></list></t> | </li> | |||
</section> | </ul> | |||
<section title="Other States" anchor="other-states"> | </section> | |||
<t> | <section anchor="other-states" numbered="true" toc="default"> | |||
<name>Other States</name> | ||||
<t> | ||||
Otherwise, | Otherwise, | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
first check sequence number | <li> | |||
<list> | <t> | |||
<t> | First, check sequence number: | |||
SYN-RECEIVED STATE<vspace /> | </t> | |||
ESTABLISHED STATE<vspace /> | <ul spacing="normal"> | |||
FIN-WAIT-1 STATE<vspace /> | <li> | |||
FIN-WAIT-2 STATE<vspace /> | SYN-RECEIVED STATE</li> | |||
CLOSE-WAIT STATE<vspace /> | <li> | |||
CLOSING STATE<vspace /> | ESTABLISHED STATE</li> | |||
LAST-ACK STATE<vspace /> | <li> | |||
TIME-WAIT STATE | FIN-WAIT-1 STATE</li> | |||
<list> | <li> | |||
<t> | FIN-WAIT-2 STATE</li> | |||
<li> | ||||
CLOSE-WAIT STATE</li> | ||||
<li> | ||||
CLOSING STATE</li> | ||||
<li> | ||||
LAST-ACK STATE</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
Segments are processed in sequence. Initial tests on arrival | Segments are processed in sequence. Initial tests on arrival | |||
are used to discard old duplicates, but further processing is | are used to discard old duplicates, but further processing is | |||
done in SEG.SEQ order. If a segment's contents straddle the | done in SEG.SEQ order. If a segment's contents straddle the | |||
boundary between old and new, only the new parts are | boundary between old and new, only the new parts are | |||
processed. | processed. | |||
</t> | </li> | |||
<t> | <li> | |||
In general, the processing of received segments MUST be | In general, the processing of received segments <bcp14>MUST</bcp14> | |||
be | ||||
implemented to aggregate ACK segments whenever possible (MUST-58). | implemented to aggregate ACK segments whenever possible (MUST-58). | |||
For example, if the TCP endpoint is processing a series of queued | For example, if the TCP endpoint is processing a series of queued | |||
segments, it MUST process them all before sending any ACK | segments, it <bcp14>MUST</bcp14> process them all before sending any ACK | |||
segments (MUST-59). | segments (MUST-59). | |||
</t> | </li> | |||
<t> | <li> | |||
<t> | ||||
There are four cases for the acceptability test for an incoming | There are four cases for the acceptability test for an incoming | |||
segment: | segment: | |||
</t> | </t> | |||
<t><figure><artwork> | <table> | |||
Segment Receive Test | <name>Segment Acceptability Tests</name> | |||
Length Window | <thead> | |||
------- ------- ------------------------------------------- | <tr> | |||
<th>Segment Length</th> | ||||
0 0 SEG.SEQ = RCV.NXT | <th>Receive Window</th> | |||
<th>Test</th> | ||||
0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND | </tr> | |||
</thead> | ||||
>0 0 not acceptable | <tbody> | |||
<tr> | ||||
>0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND | <td>0</td> | |||
or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND | <td>0</td> | |||
</artwork></figure></t> | <td>SEG.SEQ = RCV.NXT</td> | |||
<t> | </tr> | |||
In implementing sequence number validation as described here, please note | <tr> | |||
<xref target="seqval"/>. | <td>0</td> | |||
</t> | <td>>0</td> | |||
<t> | <td>RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND</td | |||
> | ||||
</tr> | ||||
<tr> | ||||
<td>>0</td> | ||||
<td>0</td> | ||||
<td>not acceptable</td> | ||||
</tr> | ||||
<tr> | ||||
<td>>0</td> | ||||
<td>>0</td> | ||||
<td> | ||||
<t>RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND</t | ||||
> | ||||
<t>or</t> | ||||
<t>RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+ | ||||
RCV.WND</t> | ||||
</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</li> | ||||
<li> | ||||
In implementing sequence number validation as described here, please note | ||||
<xref target="seqval" format="default"/>. | ||||
</li> | ||||
<li> | ||||
If the RCV.WND is zero, no segments will be acceptable, but | If the RCV.WND is zero, no segments will be acceptable, but | |||
special allowance should be made to accept valid ACKs, URGs and | special allowance should be made to accept valid ACKs, URGs, and | |||
RSTs. | RSTs. | |||
</t> | </li> | |||
<t> | <li> | |||
<t> | ||||
If an incoming segment is not acceptable, an acknowledgment | If an incoming segment is not acceptable, an acknowledgment | |||
should be sent in reply (unless the RST bit is set, if so drop | should be sent in reply (unless the RST bit is set, if so drop | |||
the segment and return): | the segment and return): | |||
<list> | </t> | |||
<t> | <t> | |||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | |||
</t></list></t> | </t> | |||
<t> | </li> | |||
<li> | ||||
After sending the acknowledgment, drop the unacceptable segment | After sending the acknowledgment, drop the unacceptable segment | |||
and return. | and return. | |||
</t> | </li> | |||
<t> | <li> | |||
Note that for the TIME-WAIT state, there is an improved algorithm | Note that for the TIME-WAIT state, there is an improved algorithm | |||
described in <xref target="RFC6191"/> for handling incoming SYN | described in <xref target="RFC6191" format="default"/> for handling incom | |||
segments, that utilizes timestamps rather than relying on | ing SYN | |||
segments that utilizes timestamps rather than relying on | ||||
the sequence number check described here. When the improved | the sequence number check described here. When the improved | |||
algorithm is implemented, the logic above is not applicable for | algorithm is implemented, the logic above is not applicable for | |||
incoming SYN segments with timestamp options, received on a | incoming SYN segments with Timestamp Options, received on a | |||
connection in the TIME-WAIT state. | connection in the TIME-WAIT state. | |||
</t> | </li> | |||
<t> | <li> | |||
In the following it is assumed that the segment is the idealized | In the following it is assumed that the segment is the idealized | |||
segment that begins at RCV.NXT and does not exceed the window. | segment that begins at RCV.NXT and does not exceed the window. | |||
One could tailor actual segments to fit this assumption by | One could tailor actual segments to fit this assumption by | |||
trimming off any portions that lie outside the window (including | trimming off any portions that lie outside the window (including | |||
SYN and FIN), and only processing further if the segment then | SYN and FIN) and only processing further if the segment then | |||
begins at RCV.NXT. Segments with higher beginning sequence | begins at RCV.NXT. Segments with higher beginning sequence | |||
numbers SHOULD be held for later processing (SHLD-31). | numbers <bcp14>SHOULD</bcp14> be held for later processing (SHLD-31). | |||
</t> | </li> | |||
</ul> | ||||
</list></t> | </li> | |||
<t> | </ul> | |||
second check the RST bit, | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
RFC 5961 <xref target="RFC5961"/> section 3 describes a potential blind reset at | Second, check the RST bit: | |||
tack and optional mitigation approach. This does not provide a cryptographic pr | </t> | |||
otection (e.g. as in IPsec or TCP-AO), but can be applicable in situations descr | <ul spacing="normal"> | |||
ibed in RFC 5961. For stacks implementing the RFC 5961 protection, the three ch | <li> | |||
ecks below apply, otherwise processing for these states is indicated further bel | <t> | |||
ow. | RFC 5961 <xref target="RFC5961" format="default"/>, Section <xref target="RFC596 | |||
<list> | 1" section="3" sectionFormat="bare" format="default"/> describes a potential bl | |||
<t>1) If the RST bit is set and the sequence number is outside the current recei | ind reset attack and optional mitigation approach. This does not provide a cryp | |||
ve window, silently drop the segment.</t> | tographic protection (e.g., as in IPsec or TCP-AO) but can be applicable in situ | |||
<t>2) If the RST bit is set and the sequence number exactly matches the next exp | ations described in RFC 5961. For stacks implementing the protection described | |||
ected sequence number (RCV.NXT), then TCP endpoints MUST reset the connection in | in RFC 5961, the three checks below apply; otherwise, processing for these state | |||
the manner prescribed below according to the connection state.</t> | s is indicated further below. | |||
<t>3) If the RST bit is set and the sequence number does not exactly match the n | ||||
ext expected sequence value, yet is within the current receive window, TCP endpo | ||||
ints MUST send an acknowledgement (challenge ACK):<vspace blankLines="1"/> | ||||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK><vspace blankLines="1"/> | ||||
After sending the challenge ACK, TCP endpoints MUST drop the unacceptable segmen | ||||
t and stop processing the incoming packet further. Note that RFC 5961 and Errat | ||||
a ID 4772 contain additional considerations for ACK throttling in an implementat | ||||
ion.</t> | ||||
</list> | ||||
</t> | </t> | |||
<t> | <ol type="%d)"> | |||
<li>If the RST bit is set and the sequence number is o | ||||
utside the current receive window, silently drop the segment.</li> | ||||
<li>If the RST bit is set and the sequence number exac | ||||
tly matches the next expected sequence number (RCV.NXT), then TCP endpoints <bcp | ||||
14>MUST</bcp14> reset the connection in the manner prescribed below according to | ||||
the connection state.</li> | ||||
<li> | ||||
<t>If the RST bit is set and the sequence number doe | ||||
s not exactly match the next expected sequence value, yet is within the current | ||||
receive window, TCP endpoints <bcp14>MUST</bcp14> send an acknowledgment (challe | ||||
nge ACK):</t> | ||||
<t> | ||||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK></t> | ||||
<t> | ||||
After sending the challenge ACK, TCP endpoints <bcp14>MUST</bcp14> drop the unac | ||||
ceptable segment and stop processing the incoming packet further. Note that RFC | ||||
5961 and Errata ID 4772 <xref target="Err4772" format="default"/> contain addit | ||||
ional considerations for ACK throttling in an implementation.</t> | ||||
</li> | ||||
</ol> | ||||
</li> | ||||
<li> | ||||
<t> | ||||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If the RST bit is set | <li> | |||
<t> | ||||
If the RST bit is set, | ||||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If this connection was initiated with a passive OPEN (i.e., | If this connection was initiated with a passive OPEN (i.e., | |||
came from the LISTEN state), then return this connection to | came from the LISTEN state), then return this connection to | |||
LISTEN state and return. The user need not be informed. If | LISTEN state and return. The user need not be informed. If | |||
this connection was initiated with an active OPEN (i.e., came | this connection was initiated with an active OPEN (i.e., came | |||
from SYN-SENT state) then the connection was refused, signal | from SYN-SENT state), then the connection was refused; signal | |||
the user "connection refused". In either case, | the user "connection refused". In either case, | |||
the retransmission queue should be flushed. And in the | the retransmission queue should be flushed. And in the | |||
active OPEN case, enter the CLOSED state and delete the TCB, | active OPEN case, enter the CLOSED state and delete the TCB, | |||
and return. | and return. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
ESTABLISHED<vspace /> | </ul> | |||
FIN-WAIT-1<vspace /> | </li> | |||
FIN-WAIT-2<vspace /> | <li> | |||
CLOSE-WAIT | ESTABLISHED STATE</li> | |||
<list> | <li> | |||
<t> | FIN-WAIT-1 STATE</li> | |||
If the RST bit is set then, any outstanding RECEIVEs and SEND | <li> | |||
FIN-WAIT-2 STATE</li> | ||||
<li> | ||||
<t> | ||||
CLOSE-WAIT STATE</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
If the RST bit is set, then any outstanding RECEIVEs and SEND | ||||
should receive "reset" responses. All segment queues should be | should receive "reset" responses. All segment queues should be | |||
flushed. Users should also receive an unsolicited general | flushed. Users should also receive an unsolicited general | |||
"connection reset" signal. Enter the CLOSED state, delete the | "connection reset" signal. Enter the CLOSED state, delete the | |||
TCB, and return. | TCB, and return. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
CLOSING STATE<vspace /> | <li> | |||
LAST-ACK STATE<vspace /> | CLOSING STATE</li> | |||
TIME-WAIT<vspace /> | <li> | |||
<list> | LAST-ACK STATE</li> | |||
<t> | <li> | |||
If the RST bit is set then, enter the CLOSED state, delete the | <t> | |||
TIME-WAIT STATE</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
If the RST bit is set, then enter the CLOSED state, delete the | ||||
TCB, and return. | TCB, and return. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
third check security | </ul> | |||
<list> | </li> | |||
<t> | <li> | |||
SYN-RECEIVED | <t> | |||
<list> | Third, check security: | |||
<t> | </t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
<t> | ||||
SYN-RECEIVED STATE | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
If the security/compartment in the segment does not | If the security/compartment in the segment does not | |||
exactly match the security/compartment in the TCB | exactly match the security/compartment in the TCB, | |||
then send a reset, and return. | then send a reset and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
ESTABLISHED<vspace /> | </li> | |||
FIN-WAIT-1<vspace /> | <li> | |||
FIN-WAIT-2<vspace /> | ESTABLISHED STATE</li> | |||
CLOSE-WAIT<vspace /> | <li> | |||
CLOSING<vspace /> | FIN-WAIT-1 STATE</li> | |||
LAST-ACK<vspace /> | <li> | |||
TIME-WAIT | FIN-WAIT-2 STATE</li> | |||
<list> | <li> | |||
<t> | CLOSE-WAIT STATE</li> | |||
<li> | ||||
CLOSING STATE</li> | ||||
<li> | ||||
LAST-ACK STATE</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
If the security/compartment in the segment does not | If the security/compartment in the segment does not | |||
exactly match the security/compartment in the TCB | exactly match the security/compartment in the TCB, | |||
then send a reset, any outstanding RECEIVEs and SEND should | then send a reset; any outstanding RECEIVEs and SEND should | |||
receive "reset" responses. All segment queues should be | receive "reset" responses. All segment queues should be | |||
flushed. Users should also receive an unsolicited general | flushed. Users should also receive an unsolicited general | |||
"connection reset" signal. Enter the CLOSED state, delete the | "connection reset" signal. Enter the CLOSED state, delete the | |||
TCB, and return. | TCB, and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
Note this check is placed following the sequence check to prevent | Note this check is placed following the sequence check to prevent | |||
a segment from an old connection between these port numbers with a | a segment from an old connection between these port numbers with a | |||
different security from causing an abort of the | different security from causing an abort of the | |||
current connection. | current connection. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
fourth, check the SYN bit, | <li> | |||
<list> | <t> | |||
<t> | Fourth, check the SYN bit: | |||
SYN-RECEIVED<vspace /> | </t> | |||
<list><t>If the connection was initiated with a passive OPEN, then retur | <ul spacing="normal"> | |||
n this connection to the LISTEN state and return. Otherwise, handle per the dir | <li> | |||
ections for synchronized states below.</t></list> | <t> | |||
ESTABLISHED STATE<vspace /> | SYN-RECEIVED STATE</t> | |||
FIN-WAIT STATE-1<vspace /> | <ul spacing="normal"> | |||
FIN-WAIT STATE-2<vspace /> | <li>If the connection was initiated with a passive OPE | |||
CLOSE-WAIT STATE<vspace /> | N, then return this connection to the LISTEN state and return. Otherwise, handl | |||
CLOSING STATE<vspace /> | e per the directions for synchronized states below.</li> | |||
LAST-ACK STATE<vspace /> | </ul> | |||
</li> | ||||
<li> | ||||
ESTABLISHED STATE</li> | ||||
<li> | ||||
FIN-WAIT-1 STATE</li> | ||||
<li> | ||||
FIN-WAIT-2 STATE</li> | ||||
<li> | ||||
CLOSE-WAIT STATE</li> | ||||
<li> | ||||
CLOSING STATE</li> | ||||
<li> | ||||
LAST-ACK STATE</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If the SYN bit is set in these synchronized states, it may be either a legitimat | <li> | |||
e new connection attempt (e.g. in the case of TIME-WAIT), an error where the con | <t> | |||
nection should be reset, or the result of an attack attempt, as described in RFC | If the SYN bit is set in these synchronized states, it may be either a legitimat | |||
5961 <xref target="RFC5961"/>. For the TIME-WAIT state, new connections can be | e new connection attempt (e.g., in the case of TIME-WAIT), an error where the co | |||
accepted if the timestamp option is used and meets expectations (per <xref targ | nnection should be reset, or the result of an attack attempt, as described in RF | |||
et="RFC6191"/>). For all other cases, RFC 5961 provides a mitigation with appli | C 5961 <xref target="RFC5961" format="default"/>. For the TIME-WAIT state, new | |||
cability to some situations, though there are also alternatives that offer crypt | connections can be accepted if the Timestamp Option is used and meets expectatio | |||
ographic protection (see <xref target="Security"/>). RFC 5961 recommends that i | ns (per <xref target="RFC6191" format="default"/>). For all other cases, RFC 59 | |||
n these synchronized states, if the SYN bit is set, irrespective of the sequence | 61 provides a mitigation with applicability to some situations, though there are | |||
number, TCP endpoints MUST send a "challenge ACK" to the remote peer: | also alternatives that offer cryptographic protection (see <xref target="Securi | |||
</t> | ty" format="default"/>). RFC 5961 recommends that in these synchronized states, | |||
<t> | if the SYN bit is set, irrespective of the sequence number, TCP endpoints <bcp1 | |||
4>MUST</bcp14> send a "challenge ACK" to the remote peer:</t> | ||||
<t> | ||||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | |||
</t> | </t> | |||
<t> | </li> | |||
After sending the acknowledgement, TCP implementations MUST drop the unacceptabl | <li> | |||
e segment and stop processing further. Note that RFC 5961 and Errata ID 4772 co | After sending the acknowledgment, TCP implementations <bcp14>MUST</bcp14> drop t | |||
ntain additional ACK throttling notes for an implementation. | he unacceptable segment and stop processing further. Note that RFC 5961 and Err | |||
</t> | ata ID 4772 <xref target="Err4772" format="default"/> contain additional ACK thr | |||
<t> | ottling notes for an implementation. | |||
For implementations that do not follow RFC 5961, the original RFC 793 be | </li> | |||
havior follows in this paragraph. If the SYN is in the window it is an error, s | <li> | |||
end a reset, any | For implementations that do not follow RFC 5961, the original behavior d | |||
outstanding RECEIVEs and SEND should receive "reset" responses | escribed in RFC 793 follows in this paragraph. If the SYN is in the window it i | |||
, | s an error: send a reset, any | |||
outstanding RECEIVEs and SEND should receive "reset" responses, | ||||
all segment queues should be flushed, the user should also | all segment queues should be flushed, the user should also | |||
receive an unsolicited general "connection reset" signal, enter | receive an unsolicited general "connection reset" signal, enter | |||
the CLOSED state, delete the TCB, and return. | the CLOSED state, delete the TCB, and return. | |||
</t> | </li> | |||
<t> | <li> | |||
If the SYN is not in the window this step would not be reached | If the SYN is not in the window, this step would not be reached | |||
and an ACK would have been sent in the first step (sequence | and an ACK would have been sent in the first step (sequence | |||
number check). | number check). | |||
</t> | </li> | |||
</list></t> | </ul> | |||
</list></t> | </li> | |||
<t> | </ul> | |||
fifth check the ACK field, | </li> | |||
<list> | <li> | |||
<t> | <t> | |||
if the ACK bit is off drop the segment and return | Fifth, check the ACK field: | |||
</t> | </t> | |||
<t> | <ul spacing="normal"> | |||
if the ACK bit is on | <li> | |||
<list> | if the ACK bit is off, drop the segment and return | |||
<t> | </li> | |||
RFC 5961 <xref target="RFC5961"/> section 5 describes a potential blind data inj | <li> | |||
ection attack, and mitigation that implementations MAY choose to include (MAY-12 | <t> | |||
). TCP stacks that implement RFC 5961 MUST add an input check that the ACK valu | if the ACK bit is on, | |||
e is acceptable only if it is in the range of ((SND.UNA - MAX.SND.WND) =< SEG | </t> | |||
.ACK =< SND.NXT). All incoming segments whose ACK value doesn't satisfy the | <ul spacing="normal"> | |||
above condition MUST be discarded and an ACK sent back. The new state variable | <li> | |||
MAX.SND.WND is defined as the largest window that the local sender has ever rece | RFC 5961 <xref target="RFC5961" section="5" sectionFormat="comma" format="defaul | |||
ived from its peer (subject to window scaling) or may be hard-coded to a maximum | t"/> describes a potential blind data injection attack, and mitigation that impl | |||
permissible window value. When the ACK value is acceptable, the processing per | ementations <bcp14>MAY</bcp14> choose to include (MAY-12). TCP stacks that impl | |||
-state below applies: | ement RFC 5961 <bcp14>MUST</bcp14> add an input check that the ACK value is acce | |||
</t> | ptable only if it is in the range of ((SND.UNA - MAX.SND.WND) =< SEG.ACK =< | |||
<t> | ; SND.NXT). All incoming segments whose ACK value doesn't satisfy the above con | |||
dition <bcp14>MUST</bcp14> be discarded and an ACK sent back. The new state var | ||||
iable MAX.SND.WND is defined as the largest window that the local sender has eve | ||||
r received from its peer (subject to window scaling) or may be hard-coded to a m | ||||
aximum permissible window value. When the ACK value is acceptable, the per-stat | ||||
e processing below applies: | ||||
</li> | ||||
<li> | ||||
<t> | ||||
SYN-RECEIVED STATE | SYN-RECEIVED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If SND.UNA < SEG.ACK =< SND.NXT then enter ESTABLISHED state | <li> | |||
and continue processing with variables below set to: | <t> | |||
<list> | If SND.UNA < SEG.ACK =< SND.NXT, then enter ESTABLISHED state | |||
<t>SND.WND <- SEG.WND<vspace /> | and continue processing with the variables below set to: | |||
SND.WL1 <- SEG.SEQ<vspace /> | </t> | |||
SND.WL2 <- SEG.ACK</t> | <ul spacing="normal" empty="true"> | |||
</list> | <li> | |||
</t> | SND.WND <- SEG.WND</li> | |||
<t> | <li> | |||
SND.WL1 <- SEG.SEQ</li> | ||||
<li> | ||||
SND.WL2 <- SEG.ACK</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
<t> | ||||
If the segment acknowledgment is not acceptable, form a | If the segment acknowledgment is not acceptable, form a | |||
reset segment, | reset segment | |||
<list> | </t> | |||
<t> | <ul spacing="normal" empty="true"> | |||
<SEQ=SEG.ACK><CTL=RST> | <li> | |||
</t></list> | <SEQ=SEG.ACK><CTL=RST> | |||
</t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
and send it. | and send it. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
<li> | ||||
<t> | ||||
ESTABLISHED STATE | ESTABLISHED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. | <li> | |||
If SND.UNA < SEG.ACK =< SND.NXT, then set SND.UNA <- SEG.ACK. | ||||
Any segments on the retransmission queue that are thereby | Any segments on the retransmission queue that are thereby | |||
entirely acknowledged are removed. Users should receive | entirely acknowledged are removed. Users should receive | |||
positive acknowledgments for buffers that have been SENT and | positive acknowledgments for buffers that have been SENT and | |||
fully acknowledged (i.e., SEND buffer should be returned with | fully acknowledged (i.e., SEND buffer should be returned with | |||
"ok" response). If the ACK is a duplicate | "ok" response). If the ACK is a duplicate | |||
(SEG.ACK =< SND.UNA), it can be ignored. If the ACK acks | (SEG.ACK =< SND.UNA), it can be ignored. If the ACK acks | |||
something not yet sent (SEG.ACK > SND.NXT) then send an ACK, | something not yet sent (SEG.ACK > SND.NXT), then send an ACK, | |||
drop the segment, and return. | drop the segment, and return. | |||
</t> | </li> | |||
<t> | <li> | |||
If SND.UNA =< SEG.ACK =< SND.NXT, the send window should be | If SND.UNA =< SEG.ACK =< SND.NXT, the send window should be | |||
updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and | updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and | |||
SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set | SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set | |||
SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK. | SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK. | |||
</t> | </li> | |||
<t> | <li> | |||
Note that SND.WND is an offset from SND.UNA, that SND.WL1 | Note that SND.WND is an offset from SND.UNA, that SND.WL1 | |||
records the sequence number of the last segment used to update | records the sequence number of the last segment used to update | |||
SND.WND, and that SND.WL2 records the acknowledgment number of | SND.WND, and that SND.WL2 records the acknowledgment number of | |||
the last segment used to update SND.WND. The check here | the last segment used to update SND.WND. The check here | |||
prevents using old segments to update the window. | prevents using old segments to update the window. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
<li> | ||||
<t> | ||||
FIN-WAIT-1 STATE | FIN-WAIT-1 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
In addition to the processing for the ESTABLISHED state, if | In addition to the processing for the ESTABLISHED state, if | |||
the FIN segment is now acknowledged then enter FIN-WAIT-2 and continue | the FIN segment is now acknowledged, then enter FIN-WAIT-2 and continu e | |||
processing in that state. | processing in that state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
In addition to the processing for the ESTABLISHED state, if | In addition to the processing for the ESTABLISHED state, if | |||
the retransmission queue is empty, the user's CLOSE can be | the retransmission queue is empty, the user's CLOSE can be | |||
acknowledged ("ok") but do not delete the TCB. | acknowledged ("ok") but do not delete the TCB. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Do the same processing as for the ESTABLISHED state. | Do the same processing as for the ESTABLISHED state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
CLOSING STATE | CLOSING STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
In addition to the processing for the ESTABLISHED state, if | In addition to the processing for the ESTABLISHED state, if | |||
the ACK acknowledges our FIN then enter the TIME-WAIT state, | the ACK acknowledges our FIN, then enter the TIME-WAIT state; | |||
otherwise ignore the segment. | otherwise, ignore the segment. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
LAST-ACK STATE | LAST-ACK STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
The only thing that can arrive in this state is an | The only thing that can arrive in this state is an | |||
acknowledgment of our FIN. If our FIN is now acknowledged, | acknowledgment of our FIN. If our FIN is now acknowledged, | |||
delete the TCB, enter the CLOSED state, and return. | delete the TCB, enter the CLOSED state, and return. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
The only thing that can arrive in this state is a | The only thing that can arrive in this state is a | |||
retransmission of the remote FIN. Acknowledge it, and restart | retransmission of the remote FIN. Acknowledge it, and restart | |||
the 2 MSL timeout. | the 2 MSL timeout. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
</list></t> | </li> | |||
<t> | </ul> | |||
sixth, check the URG bit, | </li> | |||
<list> | </ul> | |||
<t> | </li> | |||
ESTABLISHED STATE<vspace /> | <li> | |||
FIN-WAIT-1 STATE<vspace /> | <t anchor="check-urg-bit"> | |||
Sixth, check the URG bit: | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
ESTABLISHED STATE</li> | ||||
<li> | ||||
FIN-WAIT-1 STATE</li> | ||||
<li> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal | If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal | |||
the user that the remote side has urgent data if the urgent | the user that the remote side has urgent data if the urgent | |||
pointer (RCV.UP) is in advance of the data consumed. If the | pointer (RCV.UP) is in advance of the data consumed. If the | |||
user has already been signaled (or is still in the "urgent | user has already been signaled (or is still in the "urgent | |||
mode") for this continuous sequence of urgent data, do not | mode") for this continuous sequence of urgent data, do not | |||
signal the user again. | signal the user again. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
CLOSE-WAIT STATE<vspace /> | </li> | |||
CLOSING STATE<vspace /> | <li> | |||
LAST-ACK STATE<vspace /> | CLOSE-WAIT STATE</li> | |||
TIME-WAIT | <li> | |||
<list> | CLOSING STATE</li> | |||
<t> | <li> | |||
This should not occur, since a FIN has been received from the | LAST-ACK STATE</li> | |||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | ||||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
This should not occur since a FIN has been received from the | ||||
remote side. Ignore the URG. | remote side. Ignore the URG. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
seventh, process the segment text, | </ul> | |||
<list> | </li> | |||
<t> | <li> | |||
ESTABLISHED STATE<vspace /> | <t> | |||
FIN-WAIT-1 STATE<vspace /> | Seventh, process the segment text: | |||
</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
ESTABLISHED STATE</li> | ||||
<li> | ||||
FIN-WAIT-1 STATE</li> | ||||
<li> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Once in the ESTABLISHED state, it is possible to deliver segment | Once in the ESTABLISHED state, it is possible to deliver segment | |||
data to user RECEIVE buffers. Data from segments can be moved | data to user RECEIVE buffers. Data from segments can be moved | |||
into buffers until either the buffer is full or the segment is | into buffers until either the buffer is full or the segment is | |||
empty. If the segment empties and carries a PUSH flag, then | empty. If the segment empties and carries a PUSH flag, then | |||
the user is informed, when the buffer is returned, that a PUSH | the user is informed, when the buffer is returned, that a PUSH | |||
has been received. | has been received. | |||
</t> | </li> | |||
<t> | <li> | |||
When the TCP endpoint takes responsibility for delivering the data to th e | When the TCP endpoint takes responsibility for delivering the data to th e | |||
user it must also acknowledge the receipt of the data. | user, it must also acknowledge the receipt of the data. | |||
</t> | </li> | |||
<t> | <li> | |||
Once the TCP endpoint takes responsibility for the data it advances | Once the TCP endpoint takes responsibility for the data, it advances | |||
RCV.NXT over the data accepted, and adjusts RCV.WND as | RCV.NXT over the data accepted, and adjusts RCV.WND as | |||
appropriate to the current buffer availability. The total of | appropriate to the current buffer availability. The total of | |||
RCV.NXT and RCV.WND should not be reduced. | RCV.NXT and RCV.WND should not be reduced. | |||
</t> | </li> | |||
<t> | <li> | |||
A TCP implementation MAY send an ACK segment acknowledging RCV.NXT when | A TCP implementation <bcp14>MAY</bcp14> send an ACK segment acknowledgin | |||
a | g RCV.NXT when a | |||
valid segment arrives that is in the window but not at the | valid segment arrives that is in the window but not at the | |||
left window edge (MAY-13). | left window edge (MAY-13). | |||
</t> | </li> | |||
<t> | <li> | |||
Please note the window management suggestions in <xref target="datacomm" | Please note the window management suggestions in <xref target="datacomm" | |||
/>. | format="default"/>. | |||
</t> | </li> | |||
<t> | <li> | |||
<t> | ||||
Send an acknowledgment of the form: | Send an acknowledgment of the form: | |||
<list><t> | </t> | |||
<SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | <t> | |||
</t></list></t> | <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK> | |||
<t> | </t> | |||
</li> | ||||
<li> | ||||
This acknowledgment should be piggybacked on a segment being | This acknowledgment should be piggybacked on a segment being | |||
transmitted if possible without incurring undue delay. | transmitted if possible without incurring undue delay. | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
CLOSE-WAIT STATE<vspace /> | <li> | |||
CLOSING STATE<vspace /> | CLOSE-WAIT STATE</li> | |||
LAST-ACK STATE<vspace /> | <li> | |||
CLOSING STATE</li> | ||||
<li> | ||||
LAST-ACK STATE</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
This should not occur, since a FIN has been received from the | <li> | |||
This should not occur since a FIN has been received from the | ||||
remote side. Ignore the segment text. | remote side. Ignore the segment text. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | </li> | |||
</list></t> | </ul> | |||
<t> | </li> | |||
eighth, check the FIN bit, | <li> | |||
<list> | <t> | |||
<t> | Eighth, check the FIN bit: | |||
Do not process the FIN if the state is CLOSED, LISTEN or SYN-SENT | </t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
Do not process the FIN if the state is CLOSED, LISTEN, or SYN-SENT | ||||
since the SEG.SEQ cannot be validated; drop the segment and | since the SEG.SEQ cannot be validated; drop the segment and | |||
return. | return. | |||
</t> | </li> | |||
<t> | <li> | |||
<t> | ||||
If the FIN bit is set, signal the user "connection closing" and | If the FIN bit is set, signal the user "connection closing" and | |||
return any pending RECEIVEs with same message, advance RCV.NXT | return any pending RECEIVEs with same message, advance RCV.NXT | |||
over the FIN, and send an acknowledgment for the FIN. Note that | over the FIN, and send an acknowledgment for the FIN. Note that | |||
FIN implies PUSH for any segment text not yet delivered to the | FIN implies PUSH for any segment text not yet delivered to the | |||
user. | user. | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
SYN-RECEIVED STATE<vspace /> | <li> | |||
SYN-RECEIVED STATE</li> | ||||
<li> | ||||
<t> | ||||
ESTABLISHED STATE | ESTABLISHED STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Enter the CLOSE-WAIT state. | Enter the CLOSE-WAIT state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
FIN-WAIT-1 STATE | FIN-WAIT-1 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
If our FIN has been ACKed (perhaps in this segment), then | If our FIN has been ACKed (perhaps in this segment), then | |||
enter TIME-WAIT, start the time-wait timer, turn off the other | enter TIME-WAIT, start the time-wait timer, turn off the other | |||
timers; otherwise enter the CLOSING state. | timers; otherwise, enter the CLOSING state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
FIN-WAIT-2 STATE | FIN-WAIT-2 STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Enter the TIME-WAIT state. Start the time-wait timer, turn | Enter the TIME-WAIT state. Start the time-wait timer, turn | |||
off the other timers. | off the other timers. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
CLOSE-WAIT STATE | CLOSE-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Remain in the CLOSE-WAIT state. | Remain in the CLOSE-WAIT state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
CLOSING STATE | CLOSING STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Remain in the CLOSING state. | Remain in the CLOSING state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
LAST-ACK STATE | LAST-ACK STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Remain in the LAST-ACK state. | Remain in the LAST-ACK state. | |||
</t></list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
<li> | ||||
<t> | ||||
TIME-WAIT STATE | TIME-WAIT STATE | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
Remain in the TIME-WAIT state. Restart the 2 MSL time-wait | Remain in the TIME-WAIT state. Restart the 2 MSL time-wait | |||
timeout. | timeout. | |||
</t></list></t> | </li> | |||
</list></t> | </ul> | |||
</list></t> | </li> | |||
<t> | </ul> | |||
</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
and return. | and return. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | </section> | |||
</list> | </section> | |||
</t> | <section numbered="true" toc="default"> | |||
</section> | <name>Timeouts</name> | |||
<t> | ||||
</section> | ||||
<section title="Timeouts"> | ||||
<t> | ||||
<list> | ||||
<t> | ||||
USER TIMEOUT | USER TIMEOUT | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
For any state if the user timeout expires, flush all queues, signal | For any state if the user timeout expires, flush all queues, signal | |||
the user "error: connection aborted due to user timeout" in general | the user "error: connection aborted due to user timeout" in general | |||
and for any outstanding calls, delete the TCB, enter the CLOSED | and for any outstanding calls, delete the TCB, enter the CLOSED | |||
state and return. | state, and return. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | ||||
RETRANSMISSION TIMEOUT | RETRANSMISSION TIMEOUT | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
<li> | ||||
For any state if the retransmission timeout expires on a segment in | For any state if the retransmission timeout expires on a segment in | |||
the retransmission queue, send the segment at the front of the | the retransmission queue, send the segment at the front of the | |||
retransmission queue again, reinitialize the retransmission timer, | retransmission queue again, reinitialize the retransmission timer, | |||
and return. | and return. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | ||||
TIME-WAIT TIMEOUT | TIME-WAIT TIMEOUT | |||
<list> | </t> | |||
<t> | <ul spacing="normal"> | |||
If the time-wait timeout expires on a connection delete the TCB, | <li> | |||
enter the CLOSED state and return. | If the time-wait timeout expires on a connection, delete the TCB, | |||
</t> | enter the CLOSED state, and return. | |||
</list> | </li> | |||
</t> | </ul> | |||
</list> | </section> | |||
</t> | </section> | |||
</section> | </section> | |||
</section> | <section anchor="glossary" numbered="true" toc="default"> | |||
</section> | <name>Glossary</name> | |||
<dl newline="true" spacing="normal" indent="8"> | ||||
<section title="Glossary" anchor="glossary"> | <dt>ACK</dt> | |||
<t> | <dd> | |||
<list style="hanging" hangIndent="8"> | ||||
<t hangText="ACK"><vspace /> | ||||
A control bit (acknowledge) occupying no sequence space, which | A control bit (acknowledge) occupying no sequence space, which | |||
indicates that the acknowledgment field of this segment | indicates that the acknowledgment field of this segment | |||
specifies the next sequence number the sender of this segment | specifies the next sequence number the sender of this segment | |||
is expecting to receive, hence acknowledging receipt of all | is expecting to receive, hence acknowledging receipt of all | |||
previous sequence numbers.</t> | previous sequence numbers.</dd> | |||
<dt>connection</dt> | ||||
<t hangText="connection"><vspace /> | <dd> | |||
A logical communication path identified by a pair of sockets.</t> | A logical communication path identified by a pair of sockets.</dd> | |||
<dt>datagram</dt> | ||||
<t hangText="datagram"><vspace /> | <dd> | |||
A message sent in a packet switched computer communications | A message sent in a packet-switched computer communications | |||
network.</t> | network.</dd> | |||
<dt>Destination Address</dt> | ||||
<t hangText="Destination Address"><vspace /> | <dd> | |||
The network layer address of the endpoint intended to receive a segmen | The network-layer address of the endpoint intended to receive a segmen | |||
t.</t> | t.</dd> | |||
<dt>FIN</dt> | ||||
<t hangText="FIN"><vspace /> | <dd> | |||
A control bit (finis) occupying one sequence number, which | A control bit (finis) occupying one sequence number, which | |||
indicates that the sender will send no more data or control | indicates that the sender will send no more data or control | |||
occupying sequence space.</t> | occupying sequence space.</dd> | |||
<dt>flush</dt> | ||||
<t hangText="flush"><vspace /> | <dd> | |||
To remove all of the contents (data or segments) from a store (buffer | To remove all of the contents (data or segments) from a store (buffer | |||
or queue).</t> | or queue).</dd> | |||
<dt>fragment</dt> | ||||
<t hangText="fragment"><vspace /> | <dd> | |||
A portion of a logical unit of data, in particular an internet | A portion of a logical unit of data. In particular, an internet | |||
fragment is a portion of an internet datagram.</t> | fragment is a portion of an internet datagram.</dd> | |||
<dt>header</dt> | ||||
<t hangText="header"><vspace /> | <dd> | |||
Control information at the beginning of a message, segment, | Control information at the beginning of a message, segment, | |||
fragment, packet or block of data.</t> | fragment, packet, or block of data.</dd> | |||
<dt>host</dt> | ||||
<t hangText="host"><vspace /> | <dd> | |||
A computer. In particular a source or destination of messages | A computer. In particular, a source or destination of messages | |||
from the point of view of the communication network.</t> | from the point of view of the communication network.</dd> | |||
<dt>Identification</dt> | ||||
<t hangText="Identification"><vspace /> | <dd> | |||
An Internet Protocol field. This identifying value assigned | An Internet Protocol field. This identifying value assigned | |||
by the sender aids in assembling the fragments of a datagram.</t> | by the sender aids in assembling the fragments of a datagram.</dd> | |||
<dt>internet address</dt> | ||||
<t hangText="internet address"><vspace /> | <dd> | |||
A network layer address.</t> | A network-layer address.</dd> | |||
<dt>internet datagram</dt> | ||||
<t hangText="internet datagram"><vspace /> | <dd> | |||
A unit of data exchanged between internet hosts, together with the int ernet header | A unit of data exchanged between internet hosts, together with the int ernet header | |||
that allows the datagram to be routed from source to destination.</t> | that allows the datagram to be routed from source to destination.</dd> | |||
<dt>internet fragment</dt> | ||||
<t hangText="internet fragment"><vspace /> | <dd> | |||
A portion of the data of an internet datagram with an internet | A portion of the data of an internet datagram with an internet | |||
header.</t> | header.</dd> | |||
<dt>IP</dt> | ||||
<t hangText="IP"><vspace /> | <dd> | |||
Internet Protocol. See <xref target="RFC0791"/> and <xref target="RFC8 | Internet Protocol. See <xref target="RFC0791" format="default"/> and < | |||
200"/>.</t> | xref target="RFC8200" format="default"/>.</dd> | |||
<dt>IRS</dt> | ||||
<t hangText="IRS"><vspace /> | <dd> | |||
The Initial Receive Sequence number. The first sequence | The Initial Receive Sequence number. The first sequence | |||
number used by the sender on a connection.</t> | number used by the sender on a connection.</dd> | |||
<dt>ISN</dt> | ||||
<t hangText="ISN"><vspace /> | <dd> | |||
The Initial Sequence Number. The first sequence number used | The Initial Sequence Number. The first sequence number used | |||
on a connection, (either ISS or IRS). Selected in a way that is uniqu | on a connection (either ISS or IRS). Selected in a way that is unique | |||
e within a given period of time and is unpredictable to attackers.</t> | within a given period of time and is unpredictable to attackers.</dd> | |||
<dt>ISS</dt> | ||||
<t hangText="ISS"><vspace /> | <dd> | |||
The Initial Send Sequence number. The first sequence number | The Initial Send Sequence number. The first sequence number | |||
used by the sender on a connection.</t> | used by the sender on a connection.</dd> | |||
<dt>left sequence</dt> | ||||
<t hangText="left sequence"><vspace /> | <dd> | |||
This is the next sequence number to be acknowledged by the | This is the next sequence number to be acknowledged by the | |||
data receiving TCP endpoint (or the lowest currently unacknowledged | data-receiving TCP endpoint (or the lowest currently unacknowledged | |||
sequence number) and is sometimes referred to as the left edge | sequence number) and is sometimes referred to as the left edge | |||
of the send window.</t> | of the send window.</dd> | |||
<dt>module</dt> | ||||
<t hangText="module"><vspace /> | <dd> | |||
An implementation, usually in software, of a protocol or other | An implementation, usually in software, of a protocol or other | |||
procedure.</t> | procedure.</dd> | |||
<dt>MSL</dt> | ||||
<t hangText="MSL"><vspace /> | <dd> | |||
Maximum Segment Lifetime, the time a TCP segment can exist in | Maximum Segment Lifetime, the time a TCP segment can exist in | |||
the internetwork system. Arbitrarily defined to be 2 minutes.</t> | the internetwork system. Arbitrarily defined to be 2 minutes.</dd> | |||
<dt>octet</dt> | ||||
<t hangText="octet"><vspace /> | <dd> | |||
An eight bit byte.</t> | An eight-bit byte.</dd> | |||
<dt>Options</dt> | ||||
<t hangText="Options"><vspace /> | <dd> | |||
An Option field may contain several options, and each option | An Option field may contain several options, and each option | |||
may be several octets in length.</t> | may be several octets in length.</dd> | |||
<dt>packet</dt> | ||||
<t hangText="packet"><vspace /> | <dd> | |||
A package of data with a header that may or may not be | A package of data with a header that may or may not be | |||
logically complete. More often a physical packaging than a | logically complete. More often a physical packaging than a | |||
logical packaging of data.</t> | logical packaging of data.</dd> | |||
<dt>port</dt> | ||||
<t hangText="port"><vspace /> | <dd> | |||
The portion of a connection identifier used for demultiplexing connect ions | The portion of a connection identifier used for demultiplexing connect ions | |||
at an endpoint.</t> | at an endpoint.</dd> | |||
<dt>process</dt> | ||||
<t hangText="process"><vspace /> | <dd> | |||
A program in execution. A source or destination of data from | A program in execution. A source or destination of data from | |||
the point of view of the TCP endpoint or other host-to-host protocol.< | the point of view of the TCP endpoint or other host-to-host protocol.< | |||
/t> | /dd> | |||
<dt>PUSH</dt> | ||||
<t hangText="PUSH"><vspace /> | <dd> | |||
A control bit occupying no sequence space, indicating that | A control bit occupying no sequence space, indicating that | |||
this segment contains data that must be pushed through to the | this segment contains data that must be pushed through to the | |||
receiving user.</t> | receiving user.</dd> | |||
<dt>RCV.NXT</dt> | ||||
<t hangText="RCV.NXT"><vspace /> | <dd> | |||
receive next sequence number</t> | receive next sequence number</dd> | |||
<dt>RCV.UP</dt> | ||||
<t hangText="RCV.UP"><vspace /> | <dd> | |||
receive urgent pointer</t> | receive urgent pointer</dd> | |||
<dt>RCV.WND</dt> | ||||
<t hangText="RCV.WND"><vspace /> | <dd> | |||
receive window</t> | receive window</dd> | |||
<dt>receive next sequence number</dt> | ||||
<t hangText="receive next sequence number"><vspace /> | <dd> | |||
This is the next sequence number the local TCP endpoint is expecting t o | This is the next sequence number the local TCP endpoint is expecting t o | |||
receive.</t> | receive.</dd> | |||
<dt>receive window</dt> | ||||
<t hangText="receive window"><vspace /> | <dd> | |||
This represents the sequence numbers the local (receiving) TCP endpoin t | This represents the sequence numbers the local (receiving) TCP endpoin t | |||
is willing to receive. Thus, the local TCP endpoint considers that | is willing to receive. Thus, the local TCP endpoint considers that | |||
segments overlapping the range RCV.NXT to | segments overlapping the range RCV.NXT to | |||
RCV.NXT + RCV.WND - 1 carry acceptable data or control. | RCV.NXT + RCV.WND - 1 carry acceptable data or control. | |||
Segments containing sequence numbers entirely outside this | Segments containing sequence numbers entirely outside this | |||
range are considered duplicates or injection attacks and discarded.</t | range are considered duplicates or injection attacks and discarded.</d | |||
> | d> | |||
<dt>RST</dt> | ||||
<t hangText="RST"><vspace /> | <dd> | |||
A control bit (reset), occupying no sequence space, indicating | A control bit (reset), occupying no sequence space, indicating | |||
that the receiver should delete the connection without further | that the receiver should delete the connection without further | |||
interaction. The receiver can determine, based on the | interaction. The receiver can determine, based on the | |||
sequence number and acknowledgment fields of the incoming | sequence number and acknowledgment fields of the incoming | |||
segment, whether it should honor the reset command or ignore | segment, whether it should honor the reset command or ignore | |||
it. In no case does receipt of a segment containing RST give | it. In no case does receipt of a segment containing RST give | |||
rise to a RST in response.</t> | rise to a RST in response.</dd> | |||
<dt>SEG.ACK</dt> | ||||
<t hangText="SEG.ACK"><vspace /> | <dd> | |||
segment acknowledgment</t> | segment acknowledgment</dd> | |||
<dt>SEG.LEN</dt> | ||||
<t hangText="SEG.LEN"><vspace /> | <dd> | |||
segment length</t> | segment length</dd> | |||
<dt>SEG.SEQ</dt> | ||||
<t hangText="SEG.SEQ"><vspace /> | <dd> | |||
segment sequence</t> | segment sequence</dd> | |||
<dt>SEG.UP</dt> | ||||
<t hangText="SEG.UP"><vspace /> | <dd> | |||
segment urgent pointer field</t> | segment urgent pointer field</dd> | |||
<dt>SEG.WND</dt> | ||||
<t hangText="SEG.WND"><vspace /> | <dd> | |||
segment window field</t> | segment window field</dd> | |||
<dt>segment</dt> | ||||
<t hangText="segment"><vspace /> | <dd> | |||
A logical unit of data, in particular a TCP segment is the | A logical unit of data. In particular, a TCP segment is the | |||
unit of data transferred between a pair of TCP modules.</t> | unit of data transferred between a pair of TCP modules.</dd> | |||
<dt>segment acknowledgment</dt> | ||||
<t hangText="segment acknowledgment"><vspace /> | <dd> | |||
The sequence number in the acknowledgment field of the | The sequence number in the acknowledgment field of the | |||
arriving segment.</t> | arriving segment.</dd> | |||
<dt>segment length</dt> | ||||
<t hangText="segment length"><vspace /> | <dd> | |||
The amount of sequence number space occupied by a segment, | The amount of sequence number space occupied by a segment, | |||
including any controls that occupy sequence space.</t> | including any controls that occupy sequence space.</dd> | |||
<dt>segment sequence</dt> | ||||
<t hangText="segment sequence"><vspace /> | <dd> | |||
The number in the sequence field of the arriving segment.</t> | The number in the sequence field of the arriving segment.</dd> | |||
<dt>send sequence</dt> | ||||
<t hangText="send sequence"><vspace /> | <dd> | |||
This is the next sequence number the local (sending) TCP endpoint will | This is the next sequence number the local (sending) TCP endpoint will | |||
use on the connection. It is initially selected from an | use on the connection. It is initially selected from an | |||
initial sequence number curve (ISN) and is incremented for | initial sequence number curve (ISN) and is incremented for | |||
each octet of data or sequenced control transmitted.</t> | each octet of data or sequenced control transmitted.</dd> | |||
<dt>send window</dt> | ||||
<t hangText="send window"><vspace /> | <dd> | |||
This represents the sequence numbers that the remote | This represents the sequence numbers that the remote | |||
(receiving) TCP endpoint is willing to receive. It is the value of th e | (receiving) TCP endpoint is willing to receive. It is the value of th e | |||
window field specified in segments from the remote (data | window field specified in segments from the remote (data-receiving) | |||
receiving) TCP endpoint. The range of new sequence numbers that may | TCP endpoint. The range of new sequence numbers that may | |||
be emitted by a TCP implementation lies between SND.NXT and | be emitted by a TCP implementation lies between SND.NXT and | |||
SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers | SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers | |||
between SND.UNA and SND.NXT are expected, of course.)</t> | between SND.UNA and SND.NXT are expected, of course.)</dd> | |||
<dt>SND.NXT</dt> | ||||
<t hangText="SND.NXT"><vspace /> | <dd> | |||
send sequence</t> | send sequence</dd> | |||
<dt>SND.UNA</dt> | ||||
<t hangText="SND.UNA"><vspace /> | <dd> | |||
left sequence</t> | left sequence</dd> | |||
<dt>SND.UP</dt> | ||||
<t hangText="SND.UP"><vspace /> | <dd> | |||
send urgent pointer</t> | send urgent pointer</dd> | |||
<dt>SND.WL1</dt> | ||||
<t hangText="SND.WL1"><vspace /> | <dd> | |||
segment sequence number at last window update</t> | segment sequence number at last window update</dd> | |||
<dt>SND.WL2</dt> | ||||
<t hangText="SND.WL2"><vspace /> | <dd> | |||
segment acknowledgment number at last window update</t> | segment acknowledgment number at last window update</dd> | |||
<dt>SND.WND</dt> | ||||
<t hangText="SND.WND"><vspace /> | <dd> | |||
send window</t> | send window</dd> | |||
<dt>socket (or socket number, or socket address, or socket identifier)</ | ||||
<t hangText="socket (or socket number, or socket address, or socket identifier)" | dt> | |||
><vspace /> | <dd> | |||
An address that specifically includes a port identifier, that | An address that specifically includes a port identifier, that | |||
is, the concatenation of an Internet Address with a TCP port.</t> | is, the concatenation of an Internet Address with a TCP port.</dd> | |||
<dt>Source Address</dt> | ||||
<t hangText="Source Address"><vspace /> | <dd> | |||
The network layer address of the sending endpoint.</t> | The network-layer address of the sending endpoint.</dd> | |||
<dt>SYN</dt> | ||||
<t hangText="SYN"><vspace /> | <dd> | |||
A control bit in the incoming segment, occupying one sequence | A control bit in the incoming segment, occupying one sequence | |||
number, used at the initiation of a connection, to indicate | number, used at the initiation of a connection to indicate | |||
where the sequence numbering will start.</t> | where the sequence numbering will start.</dd> | |||
<dt>TCB</dt> | ||||
<t hangText="TCB"><vspace /> | <dd> | |||
Transmission control block, the data structure that records | Transmission control block, the data structure that records | |||
the state of a connection.</t> | the state of a connection.</dd> | |||
<dt>TCP</dt> | ||||
<t hangText="TCP"><vspace /> | <dd> | |||
Transmission Control Protocol: A host-to-host protocol for | Transmission Control Protocol: a host-to-host protocol for | |||
reliable communication in internetwork environments.</t> | reliable communication in internetwork environments.</dd> | |||
<dt>TOS</dt> | ||||
<t hangText="TOS"><vspace /> | <dd> | |||
Type of Service, an obsoleted IPv4 field. The same header bits curren | Type of Service, an obsoleted IPv4 field. The same header bits curren | |||
tly are used for the Differentiated Services field <xref target="RFC2474"/> cont | tly are used for the Differentiated Services field <xref target="RFC2474" format | |||
aining the Differentiated Services Code Point (DSCP) value and the 2-bit ECN cod | ="default"/> containing the Differentiated Services Codepoint (DSCP) value and t | |||
epoint <xref target="RFC3168"/>.</t> | he 2-bit ECN codepoint <xref target="RFC3168" format="default"/>.</dd> | |||
<dt>Type of Service</dt> | ||||
<t hangText="Type of Service"><vspace /> | <dd> | |||
See "TOS".</t> | See "TOS".</dd> | |||
<dt>URG</dt> | ||||
<t hangText="URG"><vspace /> | <dd> | |||
A control bit (urgent), occupying no sequence space, used to | A control bit (urgent), occupying no sequence space, used to | |||
indicate that the receiving user should be notified to do | indicate that the receiving user should be notified to do | |||
urgent processing as long as there is data to be consumed with | urgent processing as long as there is data to be consumed with | |||
sequence numbers less than the value indicated by the urgent | sequence numbers less than the value indicated by the urgent | |||
pointer.</t> | pointer.</dd> | |||
<dt>urgent pointer</dt> | ||||
<t hangText="urgent pointer"><vspace /> | <dd> | |||
A control field meaningful only when the URG bit is on. This | A control field meaningful only when the URG bit is on. This | |||
field communicates the value of the urgent pointer that | field communicates the value of the urgent pointer that | |||
indicates the data octet associated with the sending user's | indicates the data octet associated with the sending user's | |||
urgent call.</t> | urgent call.</dd> | |||
</list> | </dl> | |||
</t> | ||||
</section> | ||||
<section anchor="changes" title="Changes from RFC 793"> | ||||
<?rfc subcompact="yes" ?> | ||||
<t> | ||||
This document obsoletes RFC 793 as well as RFC 6093 and 6528, which | ||||
updated 793. In all cases, only the normative protocol specification and requir | ||||
ements have been incorporated into this document, and some informational text wi | ||||
th background and rationale may not have been carried in. The informational con | ||||
tent of those documents is still valuable in learning about and understanding TC | ||||
P, and they are valid Informational references, even though their normative cont | ||||
ent has been incorporated into this document. | ||||
</t> | ||||
<t> | ||||
The main body of this document was adapted from RFC 793's Section 3, | ||||
titled "FUNCTIONAL SPECIFICATION", with an attempt to keep formatting and layou | ||||
t as close as possible. | ||||
</t> | ||||
<t> | ||||
The collection of applicable RFC Errata that have been reported and | ||||
either accepted or held for an update to RFC 793 were incorporated (Errata IDs: | ||||
573, 574, 700, 701, 1283, 1561, 1562, 1564, 1571, 1572, 2297, 2298, 2748, 2749, | ||||
2934, 3213, 3300, 3301, 6222). Some errata were not applicable due to other cha | ||||
nges (Errata IDs: 572, 575, 1565, 1569, 2296, 3305, 3602). | ||||
</t> | ||||
<t> | ||||
Changes to the specification of the Urgent Pointer described in RFCs | ||||
1011, 1122, and 6093 were incorporated. See RFC 6093 for detailed discussion o | ||||
f why these changes were necessary. | ||||
</t> | ||||
<t> | ||||
The discussion of the RTO from RFC 793 was updated to refer to RFC 6298. The RF | ||||
C 1122 text on the RTO originally replaced the 793 text, however, RFC 2988 shoul | ||||
d have updated 1122, and has subsequently been obsoleted by 6298. | ||||
</t> | ||||
<t> | ||||
RFC 1011 <xref target="RFC1011"/> contains a number of comments about RFC 793, i | ||||
ncluding some needed changes to the TCP specification. These are expanded in RF | ||||
C 1122, which contains a collection of other changes and clarifications to RFC 7 | ||||
93. The normative items impacting the protocol have been incorporated here, tho | ||||
ugh some historically useful implementation advice and informative discussion fr | ||||
om RFC 1122 is not included here. The present document updates RFC 1011, since | ||||
this is now the TCP specification rather than RFC 793, and the comments noted in | ||||
1011 have been incorporated. | ||||
</t> | ||||
<t> | ||||
RFC 1122 contains more than just TCP requirements, so this document can't obsole | ||||
te RFC 1122 entirely. It is only marked as "updating" 1122, however, | ||||
it should be understood to effectively obsolete all of the RFC 1122 material on | ||||
TCP. | ||||
</t> | ||||
<t> | ||||
The more secure Initial Sequence Number generation algorithm from RF | ||||
C 6528 was incorporated. See RFC 6528 for discussion of the attacks that this m | ||||
itigates, as well as advice on selecting PRF algorithms and managing secret key | ||||
data. | ||||
</t> | ||||
<t> | ||||
A note based on RFC 6429 was added to explicitly clarify that system resource ma | ||||
nagement concerns allow connection resources to be reclaimed. RFC 6429 is obsol | ||||
eted in the sense that this clarification has been reflected in this update to t | ||||
he base TCP specification now. | ||||
</t> | ||||
<t> | ||||
The description of congestion control implementation was added, based on | ||||
the set of documents that are IETF BCP or Standards Track on the topic, and the | ||||
current state of common implementations. | ||||
</t> | ||||
<t> | ||||
RFC EDITOR'S NOTE: the content below is for detailed change tracking | ||||
and planning, and not to be included with the final revision of the document. | ||||
</t> | ||||
<t> | ||||
This document started as draft-eddy-rfc793bis-00, that was merely a | ||||
proposal and rough plan for updating RFC 793. | ||||
</t> | ||||
<t> | ||||
The -01 revision of this draft-eddy-rfc793bis incorporates the conte | ||||
nt of RFC 793 Section 3 titled "FUNCTIONAL SPECIFICATION". Other content from R | ||||
FC 793 has not been incorporated. The -01 revision of this document makes some | ||||
minor formatting changes to the RFC 793 content in order to convert the content | ||||
into XML2RFC format and account for left-out parts of RFC 793. For instance, fi | ||||
gure numbering differs and some indentation is not exactly the same. | ||||
</t> | ||||
<t> | ||||
The -02 revision of draft-eddy-rfc793bis incorporates errata that ha | ||||
ve been verified: | ||||
<list> | ||||
<t>Errata ID 573: Reported by Bob Braden (note: This errata repo | ||||
rt basically is just a reminder that RFC 1122 updates 793. Some of the associat | ||||
ed changes are left pending to a separate revision that incorporates 1122. Bob' | ||||
s mention of PUSH in 793 section 2.8 was not applicable here because that sectio | ||||
n was not part of the "functional specification". Also, the 1122 text on the re | ||||
transmission timeout also has been updated by subsequent RFCs, so the change her | ||||
e deviates from Bob's suggestion to apply the 1122 text.)</t> | ||||
<t>Errata ID 574: Reported by Yin Shuming</t> | ||||
<t>Errata ID 700: Reported by Yin Shuming</t> | ||||
<t>Errata ID 701: Reported by Yin Shuming</t> | ||||
<t>Errata ID 1283: Reported by Pei-chun Cheng</t> | ||||
<t>Errata ID 1561: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 1562: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 1564: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 1565: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 1571: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 1572: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 2296: Reported by Vishwas Manral</t> | ||||
<t>Errata ID 2297: Reported by Vishwas Manral</t> | ||||
<t>Errata ID 2298: Reported by Vishwas Manral</t> | ||||
<t>Errata ID 2748: Reported by Mykyta Yevstifeyev</t> | ||||
<t>Errata ID 2749: Reported by Mykyta Yevstifeyev</t> | ||||
<t>Errata ID 2934: Reported by Constantin Hagemeier</t> | ||||
<t>Errata ID 3213: Reported by EugnJun Yi</t> | ||||
<t>Errata ID 3300: Reported by Botong Huang</t> | ||||
<t>Errata ID 3301: Reported by Botong Huang</t> | ||||
<t>Errata ID 3305: Reported by Botong Huang</t> | ||||
<t>Note: Some verified errata were not used in this update, as t | ||||
hey relate to sections of RFC 793 elided from this document. These include Erra | ||||
ta ID 572, 575, and 1569.</t> | ||||
<t>Note: Errata ID 3602 was not applied in this revision as it i | ||||
s duplicative of the 1122 corrections.</t> | ||||
</list> | ||||
Not related to RFC 793 content, this revision also makes small tweak | ||||
s to the introductory text, fixes indentation of the pseudo header diagram, and | ||||
notes that the Security Considerations should also include privacy, when this se | ||||
ction is written. | ||||
</t> | ||||
<t> | ||||
The -03 revision of draft-eddy-rfc793bis revises all discussion of t | ||||
he urgent pointer in order to comply with RFC 6093, 1122, and 1011. Since 1122 | ||||
held requirements on the urgent pointer, the full list of requirements was broug | ||||
ht into an appendix of this document, so that it can be updated as-needed. | ||||
</t> | ||||
<t> | ||||
The -04 revision of draft-eddy-rfc793bis includes the ISN generation | ||||
changes from RFC 6528. | ||||
</t> | ||||
<t> | ||||
The -05 revision of draft-eddy-rfc793bis incorporates MSS requiremen | ||||
ts and definitions from RFC 879 <xref target="RFC0879"/>, 1122, and 6691, as wel | ||||
l as option-handling | ||||
requirements from RFC 1122. | ||||
</t> | ||||
<t> | ||||
The -00 revision of draft-ietf-tcpm-rfc793bis incorporates several a | ||||
dditional clarifications and updates to the section on segmentation, many of whi | ||||
ch are based on feedback from Joe Touch improving from the initial text on this | ||||
in the previous revision. | ||||
</t> | ||||
<t> | ||||
The -01 revision incorporates the change to Reserved bits due to ECN | ||||
, as well as many other changes that come from RFC 1122. | ||||
</t> | ||||
<t> | ||||
The -02 revision has small formatting modifications in order to addr | ||||
ess xml2rfc warnings about long lines. It was a quick update to avoid document | ||||
expiration. TCPM working group discussion in 2015 also indicated that we should | ||||
not try to add sections on implementation advice or similar non-normative infor | ||||
mation. | ||||
</t> | ||||
<t> | ||||
The -03 revision incorporates more content from RFC 1122: Passive OP | ||||
EN Calls, Time-To-Live, Multihoming, IP Options, ICMP messages, Data Communicati | ||||
ons, When to Send Data, When to Send a Window Update, Managing the Window, Probi | ||||
ng Zero Windows, When to Send an ACK Segment. The section on data communication | ||||
s was re-organized into clearer subsections (previously headings were embedded i | ||||
n the 793 text), and windows management advice from 793 was removed (as reviewed | ||||
by TCPM working group) in favor of the 1122 additions on SWS, ZWP, and related | ||||
topics. | ||||
</t> | ||||
<t> | ||||
The -04 revision includes reference to RFC 6429 on the ZWP condition, | ||||
RFC1122 material on TCP Connection Failures, TCP Keep-Alives, Acknowledging Que | ||||
ued Segments, and Remote Address Validation. RTO computation is referenced from | ||||
RFC 6298 rather than RFC 1122. | ||||
</t> | ||||
<t> | ||||
The -05 revision includes the requirement to implement TCP congestion | ||||
control with recommendation to implement ECN, the RFC 6633 update to 1122, whic | ||||
h changed the requirement on responding to source quench ICMP messages, and disc | ||||
ussion of ICMP (and ICMPv6) soft and hard errors per RFC 5461 (ICMPv6 handling f | ||||
or TCP doesn't seem to be mentioned elsewhere in standards track). | ||||
</t> | ||||
<t> | ||||
The -06 revision includes an appendix on "Other Implementation N | ||||
otes" to capture widely-deployed fundamental features that are not containe | ||||
d in the RFC series yet. It also added mention of RFC 6994 and the IANA TCP para | ||||
meters registry as a reference. It includes references to RFC 5961 in appropria | ||||
te places. The references to TOS were changed to DiffServ field, based on refle | ||||
cting RFC 2474 as well as the IPv6 presence of traffic class (carrying DiffServ | ||||
field) rather than TOS. | ||||
</t> | ||||
<t> | ||||
The -07 revision includes reference to RFC 6191, updated security conside | ||||
rations, discussion of additional implementation considerations, and clarificati | ||||
on of data on the SYN. | ||||
</t> | ||||
<t> | ||||
The -08 revision includes changes based on: | ||||
<list> | ||||
<t>describing treatment of reserved bits (following TCPM mailing list | ||||
thread from July 2014 on "793bis item - reserved bit behavior"</t> | ||||
<t>addition a brief TCP key concepts section to make up for not inclu | ||||
ding the outdated section 2 of RFC 793</t> | ||||
<t>changed "TCP" to "host" to resolve conflict be | ||||
tween 1122 wording on whether TCP or the network layer chooses an address when m | ||||
ultihomed</t> | ||||
<t>fixed/updated definition of options in glossary</t> | ||||
<t>moved note on aggregating ACKs from 1122 to a more appropriate loc | ||||
ation</t> | ||||
<t>resolved notes on IP precedence and security/compartment</t> | ||||
<t>added implementation note on sequence number validation</t> | ||||
<t>added note that PUSH does not apply when Nagle is active</t> | ||||
<t>added 1122 content on asynchronous reports to replace 793 section | ||||
on TCP to user messages</t> | ||||
</list> | ||||
</t> | ||||
<t> | ||||
The -09 revision fixes section numbering problems. | ||||
</t> | ||||
<t> | ||||
The -10 revision includes additions to the security considerations based | ||||
on comments from Joe Touch, and suggested edits on RST/FIN notification, RFC 252 | ||||
5 reference, and other edits suggested by Yuchung Cheng, as well as modification | ||||
s to DiffServ text from Yuchung Cheng and Gorry Fairhurst. | ||||
</t> | ||||
<t> | ||||
The -11 revision includes a start at identifying all of the requirements | ||||
text and referencing each instance in the common table at the end of the docume | ||||
nt. | ||||
</t> | ||||
<t> | ||||
The -12 revision completes the requirement language indexing started in | ||||
-11 and adds necessary description of the PUSH functionality that was missing. | ||||
</t> | ||||
<t> | ||||
The -13 revision contains only changes in the inline editor notes. | ||||
</t> | ||||
<t> | ||||
The -14 revision includes updates with regard to several comments from th | ||||
e mailing list, including editorial fixes, adding IANA considerations for the he | ||||
ader flags, improving figure title placement, and breaking up the "Terminology" | ||||
section into more appropriately titled subsections. | ||||
</t> | ||||
<t> | ||||
The -15 revision has many technical and editorial corrections from Gorry | ||||
Fairhurst's review, and subsequent discussion on the TCPM list, as well as some | ||||
other collected clarifications and improvements from mailing list discussion. | ||||
</t> | ||||
<t> | ||||
The -16 revision addresses several discussions that rose from additional | ||||
reviews and follow-up on some of Gorry Fairhurst's comments from revision 14. | ||||
</t> | ||||
<t> | ||||
The -17 revision includes errata 6222 from Charles Deng, update to the ke | ||||
y words boilerplate, updated description of the header flags registry changes, a | ||||
nd clarification about connections rather than users in the discussion of OPEN c | ||||
alls. | ||||
</t> | ||||
<t> | ||||
The -18 revision includes editorial changes to the IANA considerations, b | ||||
ased on comments from Richard Scheffenegger at the IETF 108 TCPM virtual meeting | ||||
. | ||||
</t> | ||||
<t> | ||||
The -19 revision includes editorial changes from Errata 6281 and 6282 rep | ||||
orted by Merlin Buge. It also includes WGLC changes noted by Mohamed Boucadair, | ||||
Rahul Jadhav, Praveen Balasubramanian, Matt Olson, Yi Huang, Joe Touch, and Juh | ||||
amatti Kuusisaari. | ||||
</t> | ||||
<t> | ||||
The -20 revision includes text on congestion control based on mailing li | ||||
st and meeting discussion, put together in its final form by Markku Kojo. It al | ||||
so clarifies that SACK, WS, and TS options are recommended for high performance, | ||||
but not needed for basic interoperability. It also clarifies that the length f | ||||
ield is required for new TCP options. | ||||
</t> | ||||
<t> | ||||
The -21 revision includes slight changes to the header diagram for compatibility | ||||
with tooling, from Stephen McQuistin, clarification on the meaning of idle conn | ||||
ections from Yuchung Cheng, Neal Cardwell, Michael Scharf, and Richard Scheffene | ||||
gger, editorial improvements from Markku Kojo, notes that some stacks suppress e | ||||
xtra acknowledgments of the SYN when SYN-ACK carries data from Richard Scheffene | ||||
gger, and adds MAY-18 numbering based on note from Jonathan Morton. | ||||
</t> | ||||
<t> | ||||
The -22 revision includes small clarifications on terminology (might versus may) | ||||
and IPv6 extension headers versus IPv4 options, based on comments from Gorry Fa | ||||
irhurst. | ||||
</t> | ||||
<t> | ||||
The -23 revision has a fix to indentation from Michael Tuexen and idnits issues | ||||
addressed from Michael Scharf. | ||||
</t> | ||||
<t> | ||||
The -24 revision incorporates changes after Martin Duke's AD review, including f | ||||
urther feedback on those comments from Yuchung Cheng and Joe Touch. Important c | ||||
hanges for review include (1) removal of the need to check for the PUSH flag whe | ||||
n evaluating the SWS override timer expiration, (2) clarification about receding | ||||
urgent pointer, and (3) de-duplicating handling of the RST checking between ste | ||||
p 4 and step 1. | ||||
</t> | ||||
<t> | ||||
The -25 revision incorporates changes based on the GENART review from Francis Du | ||||
pont, SECDIR review from Kyle Rose, and OPSDIR review from Sarah Banks. | ||||
</t> | ||||
<t> | ||||
The -26 revision incorporates changes stemming from the IESG reviews, and INTDIR | ||||
review from Bernie Volz. | ||||
</t> | ||||
<t> | ||||
The -27 revision fixes a few small editorial incompatibilities that Stephen McQu | ||||
istin found related to automated code generation. | ||||
</t> | ||||
<t> | ||||
The -28 revision addresses some COMMENTs from Ben Kaduk's IESG review. | ||||
</t> | ||||
<t>Some other suggested changes that will not be incorporated in this 79 | ||||
3 update unless TCPM consensus changes with regard to scope are: | ||||
<list style="numbers"> | ||||
<t>Tony Sabatini's suggestion for describing DO field</t> | ||||
<t>Per discussion with Joe Touch (TAPS list, 6/20/2015), the descript | ||||
ion of the API could be revisited</t> | ||||
<t>Reducing the R2 value for SYNs has been suggested as a possible to | ||||
pic for future consideration.</t> | ||||
</list> | ||||
</t> | ||||
<t> | ||||
Early in the process of updating RFC 793, Scott Brim mentioned that this | ||||
should include a PERPASS/privacy review. This may be something for the chairs | ||||
or AD to request during WGLC or IETF LC. | ||||
</t> | ||||
<?rfc subcompact="no" ?> | ||||
</section> | </section> | |||
<section anchor="changes" numbered="true" toc="default"> | ||||
<section anchor="IANA" title="IANA Considerations"> | <name>Changes from RFC 793</name> | |||
<t> | <t> | |||
In the "Transmission Control Protocol (TCP) Header Flags" registry | This document obsoletes RFC 793 as well as RFCs 6093 and 6528, which | |||
, IANA is asked to make several changes described in this section.</t> | updated 793. In all cases, only the normative protocol specification and requi | |||
<t>RFC 3168 originally created this registry, but only populated it with the | rements have been incorporated into this document, and some informational text w | |||
new bits defined in RFC 3168, neglecting the other bits that had previously bee | ith background and rationale may not have been carried in. The informational co | |||
n described in RFC 793 and other documents. Bit 7 has since also been updated b | ntent of those documents is still valuable in learning about and understanding T | |||
y RFC 8311.</t> | CP, and they are valid Informational references, even though their normative con | |||
<t>The "Bit" column is renamed below as the "Bit Offset" | tent has been incorporated into this document. | |||
column, since it references each header flag's offset within the 16-bit aligned | </t> | |||
view of the TCP header in <xref target="header_format"/>. The bits in offsets | <t> | |||
0 through 4 are the TCP segment Data Offset field, and not header flags.</t> | The main body of this document was adapted from RFC 793's Section <x | |||
<t>IANA should add a column for "Assignment Notes".</t> | ref target="RFC0793" section="3" sectionFormat="bare" format="default"/>, titled | |||
<t>IANA should assign values indicated below.</t> | "FUNCTIONAL SPECIFICATION", with an attempt to keep formatting and layout as cl | |||
<figure> | ose as possible. | |||
<artwork> | </t> | |||
TCP Header Flags | <t> | |||
The collection of applicable RFC errata that have been reported and | ||||
Bit Name Reference Assignmen | either accepted or held for an update to RFC 793 were incorporated (Errata IDs: | |||
t Notes | 573 <xref target="Err573" format="default"/>, 574 <xref target="Err574" format=" | |||
Offset | default"/>, 700 <xref target="Err700" format="default"/>, 701 <xref target="Err7 | |||
--- ---- --------- --------- | 01" format="default"/>, 1283 <xref target="Err1283" format="default"/>, 1561 <xr | |||
------- | ef target="Err1561" format="default"/>, 1562 <xref target="Err1562" format="defa | |||
4 Reserved for future use (this document) | ult"/>, 1564 <xref target="Err1564" format="default"/>, 1571 <xref target="Err15 | |||
5 Reserved for future use (this document) | 71" format="default"/>, 1572 <xref target="Err1572" format="default"/>, 2297 <xr | |||
6 Reserved for future use (this document) | ef target="Err2297" format="default"/>, 2298 <xref target="Err2298" format="defa | |||
7 Reserved for future use [RFC8311] [1] | ult"/>, 2748 <xref target="Err2748" format="default"/>, 2749 <xref target="Err27 | |||
8 CWR (Congestion Window Reduced) [RFC3168] | 49" format="default"/>, 2934 <xref target="Err2934" format="default"/>, 3213 <xr | |||
9 ECE (ECN-Echo) [RFC3168] | ef target="Err3213" format="default"/>, 3300 <xref target="Err3300" format="defa | |||
10 Urgent Pointer field is significant (URG) (this document) | ult"/>, 3301 <xref target="Err3301" format="default"/>, 6222 <xref target="Err62 | |||
11 Acknowledgment field is significant (ACK) (this document) | 22" format="default"/>). Some errata were not applicable due to other changes ( | |||
12 Push Function (PSH) (this document) | Errata IDs: 572 <xref target="Err572" format="default"/>, 575 <xref target="Err5 | |||
13 Reset the connection (RST) (this document) | 75" format="default"/>, 1565 <xref target="Err1565" format="default"/>, 1569 <xr | |||
14 Synchronize sequence numbers (SYN) (this document) | ef target="Err1569" format="default"/>, 2296 <xref target="Err2296" format="defa | |||
15 No more data from sender (FIN) (this document) | ult"/>, 3305 <xref target="Err3305" format="default"/>, 3602 <xref target="Err36 | |||
02" format="default"/>). | ||||
FOOTNOTES: | </t> | |||
[1] Previously used by Historic [RFC3540] as NS (Nonce Sum). | <t> | |||
</artwork> | Changes to the specification of the urgent pointer described in RFCs | |||
</figure> | 1011, 1122, and 6093 were incorporated. See RFC 6093 for detailed discussion o | |||
f why these changes were necessary. | ||||
<t>This TCP Header Flags registry should also be moved to a sub-registry und | </t> | |||
er the global "Transmission Control Protocol (TCP) Parameters registry (htt | <t> | |||
ps://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml).</t> | The discussion of the RTO from RFC 793 was updated to refer to RFC 6298. The te | |||
xt on the RTO in RFC 1122 originally replaced the text in RFC 793; however, RFC | ||||
<t>The registry's Registration Procedure should remain Standards Action, but | 2988 should have updated RFC 1122 and has subsequently been obsoleted by RFC 629 | |||
the Reference can be updated to this document, and the Note removed.</t> | 8. | |||
</t> | ||||
<t> | ||||
RFC 1011 <xref target="RFC1011" format="default"/> contains a number of comments | ||||
about RFC 793, including some needed changes to the TCP specification. These a | ||||
re expanded in RFC 1122, which contains a collection of other changes and clarif | ||||
ications to RFC 793. The normative items impacting the protocol have been incor | ||||
porated here, though some historically useful implementation advice and informat | ||||
ive discussion from RFC 1122 is not included here. The present document, which | ||||
is now the TCP specification rather than RFC 793, updates RFC 1011, and the comm | ||||
ents noted in RFC 1011 have been incorporated. | ||||
</t> | ||||
<t> | ||||
RFC 1122 contains more than just TCP requirements, so this document can't obsole | ||||
te RFC 1122 entirely. It is only marked as "updating" RFC 1122; however, it sho | ||||
uld be understood to effectively obsolete all of the material on TCP found in RF | ||||
C 1122. | ||||
</t> | ||||
<t> | ||||
The more secure initial sequence number generation algorithm from RF | ||||
C 6528 was incorporated. See RFC 6528 for discussion of the attacks that this m | ||||
itigates, as well as advice on selecting PRF algorithms and managing secret key | ||||
data. | ||||
</t> | ||||
<t> | ||||
A note based on RFC 6429 was added to explicitly clarify that system resource ma | ||||
nagement concerns allow connection resources to be reclaimed. RFC 6429 is obsol | ||||
eted in the sense that the clarification it describes has been reflected within | ||||
this base TCP specification. | ||||
</t> | ||||
<t> | ||||
The description of congestion control implementation was added based on t | ||||
he set of documents that are IETF BCP or Standards Track on the topic and the cu | ||||
rrent state of common implementations. | ||||
</t> | ||||
</section> | </section> | |||
<section anchor="IANA" numbered="true" toc="default"> | ||||
<name>IANA Considerations</name> | ||||
<t> | ||||
In the "Transmission Control Protocol (TCP) Header Flags" registry, IANA has | ||||
made several changes as described in this section.</t> | ||||
<t>RFC 3168 originally created this registry but only populated it with th | ||||
e new bits defined in RFC 3168, neglecting the other bits that had previously be | ||||
en described in RFC 793 and other documents. Bit 7 has since also been updated | ||||
by RFC 8311 <xref target="RFC8311" format="default"/>.</t> | ||||
<t>The "Bit" column has been renamed below as the "Bit Offset" column beca | ||||
use it references each header flag's offset within the 16-bit aligned view of th | ||||
e TCP header in <xref target="header_format" format="default"/>. The bits in of | ||||
fsets 0 through 3 are the TCP segment Data Offset field, and not header flags.</ | ||||
t> | ||||
<t>IANA has added a column for "Assignment Notes".</t> | ||||
<t>IANA has assigned values as indicated below.</t> | ||||
<table> | ||||
<name>TCP Header Flags</name> | ||||
<thead> | ||||
<tr> | ||||
<th>Bit Offset</th> | ||||
<th>Name</th> | ||||
<th>Reference</th> | ||||
<th>Assignment Notes</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td>4</td> | ||||
<td>Reserved for future use</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>5</td> | ||||
<td>Reserved for future use</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>6</td> | ||||
<td>Reserved for future use</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>7</td> | ||||
<td>Reserved for future use</td> | ||||
<td>RFC 8311</td> | ||||
<td>Previously used by Historic RFC 3540 as NS (Nonce Sum).</td> | ||||
</tr> | ||||
<tr> | ||||
<td>8</td> | ||||
<td>CWR (Congestion Window Reduced)</td> | ||||
<td>RFC 3168</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>9</td> | ||||
<td>ECE (ECN-Echo)</td> | ||||
<td>RFC 3168</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>10</td> | ||||
<td>Urgent pointer field is significant (URG)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>11</td> | ||||
<td>Acknowledgment field is significant (ACK)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>12</td> | ||||
<td>Push function (PSH)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>13</td> | ||||
<td>Reset the connection (RST)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>14</td> | ||||
<td>Synchronize sequence numbers (SYN)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
<tr> | ||||
<td>15</td> | ||||
<td>No more data from sender (FIN)</td> | ||||
<td>RFC 9293</td> | ||||
<td></td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<section anchor="Security" title="Security and Privacy Considerations"> | <t>The "TCP Header Flags" registry has also been moved to a subregistry un | |||
<t> | der the global "Transmission Control Protocol (TCP) Parameters" registry <eref t | |||
The TCP design includes only rudimentary security features that improve the robu | arget="https://www.iana.org/assignments/tcp-parameters/" brackets="angle"/>.</t> | |||
stness and reliability of connections and application data transfer, but there a | <t>The registry's Registration Procedure remains Standards Action, but the | |||
re no built-in cryptographic capabilities to support any form of confidentiality | Reference has been updated to this document, and the Note has been removed.</t> | |||
, authentication, or other typical security functions. Non-cryptographic enhanc | </section> | |||
ements (e.g. <xref target="RFC5961"/>) have been developed to improve robustness | <section anchor="Security" numbered="true" toc="default"> | |||
of TCP connections to particular types of attacks, but the applicability and pr | <name>Security and Privacy Considerations</name> | |||
otections of non-cryptographic enhancements are limited (e.g. see section 1.1 of | <t> | |||
<xref target="RFC5961"/>). | The TCP design includes only rudimentary security features that improve the robu | |||
Applications typically utilize lower-layer (e.g. IPsec) and upper-layer (e.g. TL | stness and reliability of connections and application data transfer, but there a | |||
S) protocols to provide security and privacy for TCP connections and application | re no built-in cryptographic capabilities to support any form of confidentiality | |||
data carried in TCP. Methods based on TCP options have been developed as well, | , authentication, or other typical security functions. Non-cryptographic enhanc | |||
to support some security capabilities. | ements (e.g., <xref target="RFC5961" format="default"/>) have been developed to | |||
</t> | improve robustness of TCP connections to particular types of attacks, but the ap | |||
<t> | plicability and protections of non-cryptographic enhancements are limited (e.g., | |||
In order to fully provide confidentiality, integrity protection, and authenticat | see <xref target="RFC5961" section="1.1" sectionFormat="of" format="default"/>) | |||
ion for TCP connections (including their control flags) IPsec is the only curren | . | |||
t effective method. For integrity protection and authentication, the TCP Authen | Applications typically utilize lower-layer (e.g., IPsec) and upper-layer (e.g., | |||
tication Option (TCP-AO) <xref target="RFC5925"/> is available, with a proposed | TLS) protocols to provide security and privacy for TCP connections and applicati | |||
extension to also provide confidentiality for the segment payload. | on data carried in TCP. Methods based on TCP Options have been developed as wel | |||
l, to support some security capabilities. | ||||
</t> | ||||
<t> | ||||
In order to fully provide confidentiality, integrity protection, and authenticat | ||||
ion for TCP connections (including their control flags), IPsec is the only curre | ||||
nt effective method. For integrity protection and authentication, the TCP Authe | ||||
ntication Option (TCP-AO) <xref target="RFC5925" format="default"/> is available | ||||
, with a proposed extension to also provide confidentiality for the segment payl | ||||
oad. | ||||
Other methods discussed in this section may provide confidentiality or integrity protection for | Other methods discussed in this section may provide confidentiality or integrity protection for | |||
the payload, but for the TCP header only cover either a subset of the fields (e. | the payload, but for the TCP header only cover either a subset of the fields (e. | |||
g. tcpcrypt <xref target="RFC8548"/>) or none at | g., tcpcrypt <xref target="RFC8548" format="default"/>) or none at | |||
all (e.g. TLS). Other security features that have been added to TCP (e.g. ISN | all (e.g., TLS). Other security features that have been added to TCP (e.g., ISN | |||
generation, sequence number checks, and others) are only capable of partially | generation, sequence number checks, and others) are only capable of partially | |||
hindering attacks. | hindering attacks. | |||
</t> | </t> | |||
<t> | <t> | |||
Applications using long-lived TCP flows have been vulnerable to attacks that exp | Applications using long-lived TCP flows have been vulnerable to attacks that exp | |||
loit the processing of control flags described in earlier TCP specifications <xr | loit the processing of control flags described in earlier TCP specifications <xr | |||
ef target="RFC4953"/>. TCP-MD5 was a commonly implemented TCP option to support | ef target="RFC4953" format="default"/>. TCP-MD5 was a commonly implemented TCP | |||
authentication for some of these connections, but had flaws and is now deprecat | Option to support authentication for some of these connections, but had flaws an | |||
ed. TCP-AO provides a capability to protect long-lived TCP connections from att | d is now deprecated. TCP-AO provides a capability to protect long-lived TCP con | |||
acks, and has superior properties to TCP-MD5. It does not provide any privacy f | nections from attacks and has superior properties to TCP-MD5. It does not provi | |||
or application data, nor for the TCP headers. | de any privacy for application data or for the TCP headers. | |||
</t> | </t> | |||
<t> | <t> | |||
The "tcpcrypt" <xref target="RFC8548"/> Experimental extension to TCP | The "tcpcrypt" <xref target="RFC8548" format="default"/> experimental extension | |||
provides the ability to cryptographically protect connection data. Metadata asp | to TCP provides the ability to cryptographically protect connection data. Metad | |||
ects of the TCP flow are still visible, but the application stream is well-prote | ata aspects of the TCP flow are still visible, but the application stream is wel | |||
cted. Within the TCP header, only the urgent pointer and FIN flag are protected | l protected. Within the TCP header, only the urgent pointer and FIN flag are pr | |||
through tcpcrypt. | otected through tcpcrypt. | |||
</t> | </t> | |||
<t> | <t> | |||
The TCP Roadmap <xref target="RFC7414"/> includes notes about several RFCs relat | The TCP Roadmap <xref target="RFC7414" format="default"/> includes notes about s | |||
ed to TCP security. Many of the enhancements provided by these RFCs have been i | everal RFCs related to TCP security. Many of the enhancements provided by these | |||
ntegrated into the present document, including ISN generation, mitigating blind | RFCs have been integrated into the present document, including ISN generation, | |||
in-window attacks, and improving handling of soft errors and ICMP packets. Thes | mitigating blind in-window attacks, and improving handling of soft errors and IC | |||
e are all discussed in greater detail in the referenced RFCs that originally des | MP packets. These are all discussed in greater detail in the referenced RFCs th | |||
cribed the changes needed to earlier TCP specifications. Additionally, see RFC | at originally described the changes needed to earlier TCP specifications. Addit | |||
6093 <xref target="RFC6093"/> for discussion of security considerations related | ionally, see RFC 6093 <xref target="RFC6093" format="default"/> for discussion o | |||
to the urgent pointer field, that has been deprecated. | f security considerations related to the urgent pointer field, which also discou | |||
</t> | rages new applications from using the urgent pointer. | |||
<t> | </t> | |||
Since TCP is often used for bulk transfer flows, some attacks are possible that | <t> | |||
abuse the TCP congestion control logic. An example is "ACK-division" | Since TCP is often used for bulk transfer flows, some attacks are possible that | |||
attacks. Updates that have been made to the TCP congestion control specification | abuse the TCP congestion control logic. An example is "ACK-division" attacks. U | |||
s include mechanisms like Appropriate Byte Counting (ABC) <xref target="RFC3465" | pdates that have been made to the TCP congestion control specifications include | |||
/> that act as mitigations to these attacks. | mechanisms like Appropriate Byte Counting (ABC) <xref target="RFC3465" format="d | |||
</t> | efault"/> that act as mitigations to these attacks. | |||
<t> | </t> | |||
Other attacks are focused on exhausting the resources of a TCP server. Examples | <t> | |||
include SYN flooding <xref target="RFC4987"/> or wasting resources on non-progr | Other attacks are focused on exhausting the resources of a TCP server. Examples | |||
essing connections <xref target="RFC6429"/>. Operating systems commonly impleme | include SYN flooding <xref target="RFC4987" format="default"/> or wasting resou | |||
nt mitigations for these attacks. Some common defenses also utilize proxies, st | rces on non-progressing connections <xref target="RFC6429" format="default"/>. | |||
ateful firewalls, and other technologies outside the end-host TCP implementation | Operating systems commonly implement mitigations for these attacks. Some common | |||
. | defenses also utilize proxies, stateful firewalls, and other technologies outsi | |||
de the end-host TCP implementation. | ||||
</t> | </t> | |||
<t> | <t> | |||
The concept of a protocol's "wire image" is described in RFC 8546 <xre | The concept of a protocol's "wire image" is described in RFC 8546 <xref target=" | |||
f target="RFC8546"/>, which describes how TCP's cleartext headers expose more me | RFC8546" format="default"/>, which describes how TCP's cleartext headers expose | |||
tadata to nodes on the path than is strictly required to route the packets to th | more metadata to nodes on the path than is strictly required to route the packet | |||
eir destination. On-path adversaries may be able to leverage this metadata. Le | s to their destination. On-path adversaries may be able to leverage this metada | |||
ssons learned in this respect from TCP have been applied in the design of newer | ta. Lessons learned in this respect from TCP have been applied in the design of | |||
transports like QUIC <xref target="RFC9000"/>. Additionally, based partly on ex | newer transports like QUIC <xref target="RFC9000" format="default"/>. Addition | |||
periences with TCP and its extensions, there are considerations that might be ap | ally, based partly on experiences with TCP and its extensions, there are conside | |||
plicable for future TCP extensions and other transports that the IETF has docume | rations that might be applicable for future TCP extensions and other transports | |||
nted in RFC 9065 <xref target="RFC9065"/>, along with IAB recommendations in RFC | that the IETF has documented in RFC 9065 <xref target="RFC9065" format="default" | |||
8558 <xref target="RFC8558"/> and <xref target="I-D.iab-use-it-or-lose-it"/>. | />, along with IAB recommendations in RFC 8558 <xref target="RFC8558" format="de | |||
</t> | fault"/> and <xref target="RFC9170" format="default"/>. | |||
<t> | </t> | |||
There are also methods of "fingerprinting" that can be used to infer t | <t> | |||
he host TCP implementation (operating system) version or platform information. | There are also methods of "fingerprinting" that can be used to infer the host TC | |||
These collect observations of several aspects such as the options present in seg | P implementation (operating system) version or platform information. These coll | |||
ments, the ordering of options, the specific behaviors in the case of various co | ect observations of several aspects, such as the options present in segments, th | |||
nditions, packet timing, packet sizing, and other aspects of the protocol that a | e ordering of options, the specific behaviors in the case of various conditions, | |||
re left to be determined by an implementer, and can use those observations to id | packet timing, packet sizing, and other aspects of the protocol that are left t | |||
entify information about the host and implementation. | o be determined by an implementer, and can use those observations to identify in | |||
</t> | formation about the host and implementation. | |||
</section> | </t> | |||
<section title="Acknowledgements"> | <t> | |||
<t> | Since ICMP message processing also can interact with TCP connections, there is p | |||
This document is largely a revision of RFC 793, which Jon Postel was the edi | otential for ICMP-based attacks against TCP connections. These are discussed in | |||
tor of. Due to his excellent work, it was able to last for three decades before | RFC 5927 <xref target="RFC5927" format="default"/>, along with mitigations that | |||
we felt the need to revise it. | have been implemented. | |||
</t> | </t> | |||
<t> | ||||
Andre Oppermann was a contributor and helped to edit the first revision of t | ||||
his document. | ||||
</t> | ||||
<t> | ||||
We are thankful for the assistance of the IETF TCPM working group chairs, ov | ||||
er the course of work on this document: | ||||
<list> | ||||
<t>Michael Scharf<vspace /> | ||||
Yoshifumi Nishida<vspace /> | ||||
Pasi Sarolahti<vspace /> | ||||
Michael Tuexen</t> | ||||
</list> | ||||
</t> | ||||
<t> | ||||
During the discussions of this work on the TCPM mailing list, in working gro | ||||
up meetings, and via area reviews, helpful comments, critiques, and reviews were | ||||
received from (listed alphabetically by last name): Praveen Balasubramanian, Da | ||||
vid Borman, Mohamed Boucadair, Bob Briscoe, Neal Cardwell, Yuchung Cheng, Martin | ||||
Duke, Francis Dupont, Ted Faber, Gorry Fairhurst, Fernando Gont, Rodney Grimes, | ||||
Yi Huang, Rahul Jadhav, Markku Kojo, Mike Kosek, Juhamatti Kuusisaari, Kevin La | ||||
hey, Kevin Mason, Matt Mathis, Stephen McQuistin, Jonathan Morton, Matt Olson, T | ||||
ommy Pauly, Tom Petch, Hagen Paul Pfeifer, Kyle Rose, Anthony Sabatini, Michael | ||||
Scharf, Greg Skinner, Joe Touch, Michael Tuexen, Reji Varghese, Bernie Volz, Tim | ||||
Wicinski, Lloyd Wood, and Alex Zimmermann. | ||||
</t> | ||||
<t> | ||||
Joe Touch provided additional help in clarifying the description of segment | ||||
size parameters and PMTUD/PLPMTUD recommendations. Markku Kojo helped put toget | ||||
her the text in the section on TCP Congestion Control. | ||||
</t> | ||||
<t> | ||||
This document includes content from errata that were reported by (listed chr | ||||
onologically): Yin Shuming, Bob Braden, Morris M. Keesan, Pei-chun Cheng, Consta | ||||
ntin Hagemeier, Vishwas Manral, Mykyta Yevstifeyev, EungJun Yi, Botong Huang, Ch | ||||
arles Deng, Merlin Buge. | ||||
</t> | ||||
</section> | </section> | |||
</middle> | </middle> | |||
<!-- *****BACK MATTER ***** --> | ||||
<back> | <back> | |||
<!-- References split to informative and normative --> | <references> | |||
<references title="Normative References"> | <name>References</name> | |||
<!-- A *really* full, totally OTT reference - Note, the "target" attrib | <references> | |||
ute of the | <name>Normative References</name> | |||
"reference": if you want a URI printed in the reference, this is whe | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
re it goes. --> | FC.0791.xml"/> | |||
<!-- | <reference anchor="RFC1191" target="https://www.rfc-editor.org/info/rfc1 | |||
<reference anchor='RFC2119' | 191"> | |||
target='http://xml.resource.org/public/rfc/html/rfc2119.html' | <front> | |||
> | <title>Path MTU discovery</title> | |||
<front> | <author fullname="J.C. Mogul" initials="J." surname="Mogul"/> | |||
<title abbrev='RFC Key Words'>Key words for use in RFCs to Indic | <author fullname="S.E. Deering" initials="S." surname="Deering"/> | |||
ate Requirement | <date month="November" year="1990"/> | |||
Levels</title> | </front> | |||
<author initials='S.' surname='Bradner' fullname='Scott Bradner' | <seriesInfo name="RFC" value="1191"/> | |||
> | <seriesInfo name="DOI" value="10.17487/RFC1191"/> | |||
<organization>Harvard University</organization> | ||||
<address> | ||||
<postal> | ||||
<street>1350 Mass. Ave.</street> | ||||
<street>Cambridge</street> | ||||
<street>MA 02138</street> | ||||
</postal> | ||||
<phone>- +1 617 495 3864</phone> | ||||
<email>sob@harvard.edu</email> | ||||
</address> | ||||
</author> | ||||
<date year='1997' month='March' /> | ||||
<area>General</area> | ||||
<keyword>keyword</keyword> | ||||
<abstract> | ||||
<t>In many standards track documents several words are used | ||||
to signify | ||||
the requirements in the specification. These words are ofte | ||||
n | ||||
capitalized. This document defines these words as they shou | ||||
ld be | ||||
interpreted in IETF documents. Authors who follow these gui | ||||
delines | ||||
should incorporate this phrase near the beginning of their d | ||||
ocument: | ||||
<list> | ||||
<t> | ||||
The key words "MUST", "MUST NOT" | ||||
, | ||||
"REQUIRED", "SHALL", "SHALL | ||||
NOT", | ||||
"SHOULD", "SHOULD NOT", "RE | ||||
COMMENDED", | ||||
"MAY", and "OPTIONAL" in this do | ||||
cument are to be | ||||
interpreted as described in RFC 2119.</t> | ||||
</list> | ||||
</t> | ||||
<t> | ||||
Note that the force of these words is modified by the requir | ||||
ement level of | ||||
the document in which they are used.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name='BCP' value='14' /> | ||||
<seriesInfo name='RFC' value='2119' /> | ||||
<format type='TXT' octets='4723' target='ftp://ftp.isi.edu/in-notes/ | ||||
rfc2119.txt' /> | ||||
<format type='HTML' octets='14486' | ||||
target='http://xml.resource.org/public/rfc/html/rfc2119.html | ||||
' /> | ||||
<format type='XML' octets='5661' | ||||
target='http://xml.resource.org/public/rfc/xml/rfc2119.xml' | ||||
/> | ||||
</reference> | </reference> | |||
--> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
FC.2119.xml"/> | ||||
<!-- Right back at the beginning we defined an entity which (we asserted | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
) would contain | FC.2474.xml"/> | |||
XML needed for a reference... this is where we use it. --> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC0791; | FC.2914.xml"/> | |||
&RFC1191; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC2119; | FC.3168.xml"/> | |||
&RFC2474; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC2914; | FC.5033.xml"/> | |||
&RFC3168; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC5033; | FC.5681.xml"/> | |||
&RFC5681; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC5961; | FC.5961.xml"/> | |||
&RFC6298; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC6633; | FC.6298.xml"/> | |||
&RFC8174; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8200; | FC.6633.xml"/> | |||
&RFC8201; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8961; | FC.8174.xml"/> | |||
</references> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
FC.8200.xml"/> | ||||
<references title="Informative References"> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<!-- A reference written by by an organization not a persoN. --> | FC.8201.xml"/> | |||
<!-- | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<reference | FC.8961.xml"/> | |||
anchor="DOMINATION" > | </references> | |||
<front> | <references> | |||
<title>Ultimate Plan for Taking Over the World</title> | <name>Informative References</name> | |||
<author> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<organization>Mad Dominators, Inc.</organization> | FC.0793.xml"/> | |||
</author> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<date year="1984" /> | FC.0896.xml"/> | |||
</front> | <reference anchor="RFC1011" target="https://www.rfc-editor.org/info/rfc1 | |||
011"> | ||||
<front> | ||||
<title>Official Internet protocols</title> | ||||
<author fullname="J.K. Reynolds" initials="J." surname="Reynolds"/> | ||||
<author fullname="J. Postel" initials="J." surname="Postel"/> | ||||
<date month="May" year="1987"/> | ||||
</front> | ||||
<seriesInfo name="RFC" value="1011"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC1011"/> | ||||
</reference> | </reference> | |||
--> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC0793; | FC.1122.xml"/> | |||
&RFC0879; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC0896; | FC.1349.xml"/> | |||
&RFC1011; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC1122; | FC.1644.xml"/> | |||
&RFC1349; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC1644; | FC.2018.xml"/> | |||
<!--&RFC1191;--> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<!--&RFC2675;--> | FC.2525.xml"/> | |||
&RFC2018; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC2525; | FC.2675.xml"/> | |||
&RFC2675; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC2873; | FC.2873.xml"/> | |||
&RFC2883; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC2923; | FC.2883.xml"/> | |||
&RFC3449; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC3465; | FC.2923.xml"/> | |||
&RFC4727; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC4821; | FC.3449.xml"/> | |||
&RFC4987; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC4953; | FC.3465.xml"/> | |||
&RFC5044; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC5461; | FC.4727.xml"/> | |||
&RFC5570; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC5795; | FC.4821.xml"/> | |||
&RFC5925; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC6093; | FC.4987.xml"/> | |||
&RFC6191; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
FC.4953.xml"/> | ||||
&RFC6429; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC6528; | FC.5044.xml"/> | |||
&RFC6691; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC6864; | FC.5461.xml"/> | |||
&RFC6994; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC7094; | FC.5570.xml"/> | |||
&RFC7323; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC7413; | FC.5795.xml"/> | |||
&RFC7414; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC7657; | FC.5925.xml"/> | |||
&RFC8087; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8095; | FC.6093.xml"/> | |||
&RFC8303; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8504; | FC.6191.xml"/> | |||
&RFC8546; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8548; | FC.6429.xml"/> | |||
&RFC8558; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC8684; | FC.6528.xml"/> | |||
&RFC9000; | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
&RFC9065; | FC.6691.xml"/> | |||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
<reference anchor="TCP-parameters-registry"> | FC.6864.xml"/> | |||
<front> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<title>Transmission Control Protocol (TCP) Parameters, https://www.ian | FC.6994.xml"/> | |||
a.org/assignments/tcp-parameters/tcp-parameters.xhtml | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
</title> | FC.7094.xml"/> | |||
<author> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<organization>IANA</organization> | FC.7323.xml"/> | |||
</author> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
<date year="2019"/> | FC.7413.xml"/> | |||
</front> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | |||
FC.7414.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.7657.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8087.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8095.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8303.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8311.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8504.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8546.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8548.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8558.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8684.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.9000.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.9065.xml"/> | ||||
<reference anchor="TCP-parameters-registry" target="https://www.iana.org | ||||
/assignments/tcp-parameters/"> | ||||
<front> | ||||
<title>Transmission Control Protocol (TCP) Parameters | ||||
</title> | ||||
<author> | ||||
<organization>IANA</organization> | ||||
</author> | ||||
</front> | ||||
</reference> | </reference> | |||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-gont-tc | ||||
<reference anchor="header-flags-registry"> | pm-tcp-seccomp-prec.xml"/> | |||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-gont-tc | ||||
pm-tcp-seq-validation.xml"/> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-ietf-tc | ||||
pm-tcp-edo.xml"/> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-mcquist | ||||
in-augmented-ascii-diagrams.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.9170.xml"/> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-minshal | ||||
l-nagle.xml"/> | ||||
<reference anchor="DS78"> | ||||
<front> | ||||
<title>Connection Management in Transport Protocols</title> | ||||
<author initials="Y" surname="Dalal"/> | ||||
<author initials="C" surname="Sunshine"/> | ||||
<date year="1978" month="December"/> | ||||
</front> | ||||
<refcontent>Computer Networks, Vol. 2, No. 6, pp. 454-473</refcontent> | ||||
<seriesInfo name="DOI" value="10.1016/0376-5075(78)90053-3"/> | ||||
</reference> | ||||
<reference anchor="FTY99"> | ||||
<front> | ||||
<title>The TIME-WAIT state in TCP and Its Effect on Busy Servers</ti | ||||
tle> | ||||
<author initials="T" surname="Faber"/> | ||||
<author initials="J" surname="Touch"/> | ||||
<author initials="W" surname="Yui"/> | ||||
<date year="1999" month="March"/> | ||||
</front> | ||||
<refcontent>Proceedings of IEEE INFOCOM, pp. 1573-1583</refcontent> | ||||
<seriesInfo name="DOI" value="10.1109/INFCOM.1999.752180"/> | ||||
</reference> | ||||
<reference anchor="IEN177" target="https://www.rfc-editor.org/ien/ien177 | ||||
.txt"> | ||||
<front> | ||||
<title>Comments on Action Items from the January Meeting</title> | ||||
<author initials="J" surname="Postel"/> | ||||
<date year="1981" month="March"/> | ||||
</front> | ||||
<seriesInfo name="IEN" value="177"/> | ||||
</reference> | ||||
<reference anchor="offload" target="https://www.kernel.org/doc/html/late | ||||
st/networking/segmentation-offloads.html"> | ||||
<front> | ||||
<title>Segmentation Offloads</title> | ||||
<author/> | ||||
<date/> | ||||
</front> | ||||
<refcontent>The Linux Kernel Documentation</refcontent> | ||||
</reference> | ||||
<reference anchor="Err573" quote-title="false" target="https://www.rfc-e | ||||
ditor.org/errata/eid573"> | ||||
<front> | ||||
<title>Erratum ID 573</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err574" quote-title="false" target="https://www.rfc-e | ||||
ditor.org/errata/eid574"> | ||||
<front> | ||||
<title>Erratum ID 574</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err700" quote-title="false" target="https://www.rfc-e | ||||
ditor.org/errata/eid700"> | ||||
<front> | ||||
<title>Erratum ID 700</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err701" quote-title="false" target="https://www.rfc-e | ||||
ditor.org/errata/eid701"> | ||||
<front> | ||||
<title>Erratum ID 701</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1283" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1283"> | ||||
<front> | ||||
<title>Erratum ID 1283</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1561" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1561"> | ||||
<front> | ||||
<title>Erratum ID 1561</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1562" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1562"> | ||||
<front> | ||||
<title>Erratum ID 1562</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1564" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1564"> | ||||
<front> | ||||
<title>Erratum ID 1564</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1571" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1571"> | ||||
<front> | ||||
<title>Erratum ID 1571</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1572" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1572"> | ||||
<front> | ||||
<title>Erratum ID 1572</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2297" quote-title="false" target="https://www.rfc-e | ||||
ditor.org/errata/eid2297"> | ||||
<front> | ||||
<title>Erratum ID 2297</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2298" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid2298"> | ||||
<front> | ||||
<title>Erratum ID 2298</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2748" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid2748"> | ||||
<front> | ||||
<title>Erratum ID 2748</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2749" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid2749"> | ||||
<front> | ||||
<title>Erratum ID 2749</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2934" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid2934"> | ||||
<front> | ||||
<title>Erratum ID 2934</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err3213" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid3213"> | ||||
<front> | <front> | |||
<title>Transmission Control Protocol (TCP) Header Flags, https://www.i | <title>Erratum ID 3213</title> | |||
ana.org/assignments/tcp-header-flags/tcp-header-flags.xhtml | <author> | |||
</title> | <organization>RFC Errata</organization> | |||
<author> | </author> | |||
<organization>IANA</organization> | </front> | |||
</author> | <refcontent>RFC 793</refcontent> | |||
<date year="2019"/> | ||||
</front> | ||||
</reference> | </reference> | |||
<reference anchor="Err3300" quote-title="false" target="https://www.rfc- | ||||
&I-D.gont-tcpm-tcp-seccomp-prec; | editor.org/errata/eid3300"> | |||
&I-D.gont-tcpm-tcp-seq-validation; | <front> | |||
&I-D.ietf-tcpm-tcp-edo; | <title>Erratum ID 3300</title> | |||
&I-D.mcquistin-augmented-ascii-diagrams; | <author> | |||
&I-D.iab-use-it-or-lose-it; | <organization>RFC Errata</organization> | |||
</author> | ||||
<reference anchor="draft-minshall-nagle"> | </front> | |||
<front> | <refcontent>RFC 793</refcontent> | |||
<title>A Proposed Modification to Nagle's Algorithm | ||||
</title> | ||||
<author initials="G" surname="Minshall" fullname="Greg Minshall"> | ||||
</author> | ||||
<date month="June" year="1999"/> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-minshall-nagle-01"/> | ||||
</reference> | </reference> | |||
<reference anchor="Err3301" quote-title="false" target="https://www.rfc- | ||||
<reference | editor.org/errata/eid3301"> | |||
anchor="DS78" > | <front> | |||
<front> | <title>Erratum ID 3301</title> | |||
<title>Connection Management in Transport Protocols</title> | <author> | |||
<author initials = "Y" surname="Dalal"></author> | <organization>RFC Errata</organization> | |||
<author initials = "C" surname="Sunshine"></author> | </author> | |||
<date year="1978" month="December" /> | </front> | |||
</front> | <refcontent>RFC 793</refcontent> | |||
<seriesInfo name="Computer Networks" value="Vol. 2, No. 6, pp. 454-4 | ||||
73"/> | ||||
</reference> | </reference> | |||
<reference anchor="Err6222" quote-title="false" target="https://www.rfc- | ||||
<reference | editor.org/errata/eid6222"> | |||
anchor="FTY99"> | <front> | |||
<front> | <title>Erratum ID 6222</title> | |||
<title>The TIME-WAIT state in TCP and Its Effect on Busy Servers | <author> | |||
</title> | <organization>RFC Errata</organization> | |||
<author initials = "T" surname="Faber"></author> | </author> | |||
<author initials = "J" surname="Touch"></author> | </front> | |||
<author initials = "W" surname="Yui"></author> | <refcontent>RFC 793</refcontent> | |||
<date year="1999" month="March" /> | ||||
</front> | ||||
<seriesInfo name="Proceedings of IEEE INFOCOM" value="pp. 1573-1583" | ||||
/> | ||||
</reference> | </reference> | |||
<reference anchor="Err572" quote-title="false" target="https://www.rfc-e | ||||
<reference | ditor.org/errata/eid572"> | |||
anchor="IEN177" target="https://www.rfc-editor.org/ien/ien177.txt"> | <front> | |||
<front> | <title>Erratum ID 572</title> | |||
<title>Comments on Action Items from the January Meeting</title> | <author> | |||
<author initials = "J" surname="Postel"></author> | <organization>RFC Errata</organization> | |||
<date year="1981" month="March" /> | </author> | |||
</front> | </front> | |||
<seriesInfo name="IEN" value="177"/> | <refcontent>RFC 793</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="Err575" quote-title="false" target="https://www.rfc-e | ||||
<reference anchor="offload" target="https://www.kernel.org/doc/html/late | ditor.org/errata/eid575"> | |||
st/networking/segmentation-offloads.html"> | <front> | |||
<front> | <title>Erratum ID 575</title> | |||
<title>Segmentation Offloads</title> | <author> | |||
<author></author> | <organization>RFC Errata</organization> | |||
<date/> | </author> | |||
</front> | </front> | |||
<seriesInfo name="Linux Networking Documentation" value=""/> | <refcontent>RFC 793</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="Err1565" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1565"> | ||||
<front> | ||||
<title>Erratum ID 1565</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err1569" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid1569"> | ||||
<front> | ||||
<title>Erratum ID 1569</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err2296" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid2296"> | ||||
<front> | ||||
<title>Erratum ID 2296</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err3305" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid3305"> | ||||
<front> | ||||
<title>Erratum ID 3305</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err3602" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid3602"> | ||||
<front> | ||||
<title>Erratum ID 3602</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 793</refcontent> | ||||
</reference> | ||||
<reference anchor="Err4772" quote-title="false" target="https://www.rfc- | ||||
editor.org/errata/eid4772"> | ||||
<front> | ||||
<title>Erratum ID 4772</title> | ||||
<author> | ||||
<organization>RFC Errata</organization> | ||||
</author> | ||||
</front> | ||||
<refcontent>RFC 5961</refcontent> | ||||
</reference> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.5927.xml"/> | ||||
</references> | ||||
</references> | </references> | |||
<section title="Other Implementation Notes"> | <section numbered="true" toc="default"> | |||
<t> | <name>Other Implementation Notes</name> | |||
<t> | ||||
This section includes additional notes and references on TCP implementation deci sions that are currently not a part of the RFC series or included within the TCP standard. These items can be considered by implementers, but there was not yet a consensus to include them in the standard. | This section includes additional notes and references on TCP implementation deci sions that are currently not a part of the RFC series or included within the TCP standard. These items can be considered by implementers, but there was not yet a consensus to include them in the standard. | |||
</t> | </t> | |||
<section anchor="seccomp" numbered="true" toc="default"> | ||||
<section title="IP Security Compartment and Precedence" anchor="seccomp"> | <name>IP Security Compartment and Precedence</name> | |||
<t> | <t> | |||
The IPv4 specification <xref target="RFC0791"/> includes a precedence value in | The IPv4 specification <xref target="RFC0791" format="default"/> includes a prec | |||
the (now obsoleted) Type of Service field (TOS) field. It was modified in | edence value in | |||
<xref target="RFC1349"/>, and then obsoleted by the definition of | the (now obsoleted) Type of Service (TOS) field. It was modified in | |||
Differentiated Services (DiffServ) <xref target="RFC2474"/>. Setting and | <xref target="RFC1349" format="default"/> and then obsoleted by the definition o | |||
conveying TOS between the network layer, TCP implementation, and applications is | f | |||
obsolete, | Differentiated Services (Diffserv) <xref target="RFC2474" format="default"/>. S | |||
and replaced by DiffServ in the current TCP specification. | etting and | |||
conveying TOS between the network layer, TCP implementation, and applications is | ||||
obsolete | ||||
and is replaced by Diffserv in the current TCP specification. | ||||
</t> | </t> | |||
<t> | <t> | |||
RFC 793 required checking the IP security compartment and precedence on | RFC 793 required checking the IP security compartment and precedence on | |||
incoming TCP segments for consistency within a connection, and with | incoming TCP segments for consistency within a connection and with | |||
application requests. Each of these aspects of IP have become outdated, | application requests. Each of these aspects of IP have become outdated, | |||
without specific updates to RFC 793. The issues with precedence were | without specific updates to RFC 793. The issues with precedence were | |||
fixed by <xref target="RFC2873"/>, which is Standards Track, and so this | fixed by <xref target="RFC2873" format="default"/>, which is Standards Track, an d so this | |||
present TCP specification includes those changes. However, the state of | present TCP specification includes those changes. However, the state of | |||
IP security options that may be used by MLS systems is not as apparent in | IP security options that may be used by Multi-Level Secure (MLS) systems is not as apparent in | |||
the IETF currently. | the IETF currently. | |||
</t> | </t> | |||
<t> | <t> | |||
Resetting connections when incoming packets do not meet expected security | Resetting connections when incoming packets do not meet expected security | |||
compartment or precedence expectations has been recognized as a possible | compartment or precedence expectations has been recognized as a possible | |||
attack vector <xref target="I-D.gont-tcpm-tcp-seccomp-prec"/>, and there has | attack vector <xref target="I-D.gont-tcpm-tcp-seccomp-prec" format="default"/>, and there has | |||
been discussion about amending the TCP specification to prevent connections | been discussion about amending the TCP specification to prevent connections | |||
from being aborted due to non-matching IP security compartment and DiffServ | from being aborted due to nonmatching IP security compartment and Diffserv | |||
codepoint values. | codepoint values. | |||
</t> | </t> | |||
<section title="Precedence"> | <section numbered="true" toc="default"> | |||
<t> | <name>Precedence</name> | |||
<t> | ||||
In DiffServ the former precedence values are treated as Class Selector | In Diffserv, the former precedence values are treated as Class Selector | |||
codepoints, and methods for compatible treatment are described in the DiffServ | codepoints, and methods for compatible treatment are described in the Diffserv | |||
architecture. The RFC 793/1122 TCP specification includes logic intending to | architecture. The RFC TCP specification defined by RFCs 793 and 1122 included l | |||
ogic intending to | ||||
have connections use the highest precedence requested by either endpoint | have connections use the highest precedence requested by either endpoint | |||
application, and to keep the precedence consistent throughout a connection. | application, and to keep the precedence consistent throughout a connection. | |||
This logic from the obsolete TOS is not applicable for DiffServ, and should | This logic from the obsolete TOS is not applicable to Diffserv and should | |||
not be included in TCP implementations, though changes to DiffServ values | not be included in TCP implementations, though changes to Diffserv values | |||
within a connection are discouraged. For discussion of this, see RFC 7657 (sec | within a connection are discouraged. For discussion of this, see RFC 7657 (Sect | |||
5.1, 5.3, and 6) <xref target="RFC7657"/>. | ions <xref target="RFC7657" section="5.1" sectionFormat="bare" format="default"/ | |||
>, <xref target="RFC7657" section="5.3" sectionFormat="bare" format="default"/>, | ||||
and <xref target="RFC7657" section="6" sectionFormat="bare" format="default"/>) | ||||
<xref target="RFC7657" format="default"/>. | ||||
</t> | </t> | |||
<t> | <t> | |||
The obsoleted TOS processing rules in TCP assumed bidirectional (or symmetric) p recedence values | The obsoleted TOS processing rules in TCP assumed bidirectional (or symmetric) p recedence values | |||
used on a connection, but the DiffServ architecture is asymmetric. | used on a connection, but the Diffserv architecture is asymmetric. | |||
Problems with the old TCP logic in this regard were described in <xref | Problems with the old TCP logic in this regard were described in <xref target="R | |||
target="RFC2873"/> and the solution described is to ignore IP precedence in | FC2873" format="default"/>, and the solution described is to ignore IP precedenc | |||
e in | ||||
TCP. Since RFC 2873 is a Standards Track document (although not marked as | TCP. Since RFC 2873 is a Standards Track document (although not marked as | |||
updating RFC 793), current implementations are expected to be robust to these | updating RFC 793), current implementations are expected to be robust in these | |||
conditions. Note that the DiffServ field value used in each direction is a | conditions. Note that the Diffserv field value used in each direction is a | |||
part of the interface between TCP and the network layer, and values in use can b e | part of the interface between TCP and the network layer, and values in use can b e | |||
indicated both ways between TCP and the application. | indicated both ways between TCP and the application. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="MLS Systems"> | <section numbered="true" toc="default"> | |||
<t> | <name>MLS Systems</name> | |||
<t> | ||||
The IP security option (IPSO) and compartment defined in <xref | The IP Security Option (IPSO) and compartment defined in <xref target="RFC0791 | |||
target="RFC0791"/> was refined in RFC 1038 that was later obsoleted by RFC | " format="default"/> was refined in RFC 1038, which was later obsoleted by RFC | |||
1108. The Commercial IP Security Option (CIPSO) is defined in FIPS-188 (withd | 1108. The Commercial IP Security Option (CIPSO) is defined in FIPS-188 (withd | |||
rawn by NIST in 2015), and | rawn by NIST in 2015) and | |||
is supported by some vendors and operating systems. RFC 1108 is now | is supported by some vendors and operating systems. RFC 1108 is now | |||
Historic, though RFC 791 itself has not been updated to remove the IP | Historic, though RFC 791 itself has not been updated to remove the IP | |||
security option. For IPv6, a similar option (CALIPSO) has been defined <xref | Security Option. For IPv6, a similar option (Common Architecture Label IPv6 S | |||
target="RFC5570"/>. RFC 793 includes logic that includes the IP | ecurity Option (CALIPSO)) has been defined <xref target="RFC5570" format="defaul | |||
t"/>. RFC 793 includes logic that includes the IP | ||||
security/compartment information in treatment of TCP segments. References to | security/compartment information in treatment of TCP segments. References to | |||
the IP "security/compartment" in this document may be relevant for | the IP "security/compartment" in this document may be relevant for | |||
Multi-Level Secure (MLS) system implementers, but can be ignored for non-MLS | Multi-Level Secure (MLS) system implementers but can be ignored for non-MLS | |||
implementations, consistent with running code on the Internet. See <xref | implementations, consistent with running code on the Internet. See <xref targ | |||
target="seccomp"/> for further discussion. Note that RFC 5570 describes some | et="seccomp" format="default"/> for further discussion. Note that RFC 5570 desc | |||
ribes some | ||||
MLS networking scenarios where IPSO, CIPSO, or CALIPSO may be used. In these | MLS networking scenarios where IPSO, CIPSO, or CALIPSO may be used. In these | |||
special cases, TCP implementers should see section 7.3.1 of RFC 5570, and | special cases, TCP implementers should see Section <xref target="RFC5570" sect ion="7.3.1" sectionFormat="bare" format="default"/> of RFC 5570 and | |||
follow the guidance in that document. | follow the guidance in that document. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section title="Sequence Number Validation" anchor="seqval"> | <section anchor="seqval" numbered="true" toc="default"> | |||
<t> | <name>Sequence Number Validation</name> | |||
<t> | ||||
There are cases where the TCP sequence number validation rules can prevent ACK f ields from being processed. This can result in connection issues, as described in | There are cases where the TCP sequence number validation rules can prevent ACK f ields from being processed. This can result in connection issues, as described in | |||
<xref target="I-D.gont-tcpm-tcp-seq-validation"/>, which includes descripti ons of potential problems in conditions of simultaneous open, self-connects, sim ultaneous close, and simultaneous window probes. The document also describes po tential changes to the TCP specification to mitigate the issue by expanding the acceptable sequence numbers. | <xref target="I-D.gont-tcpm-tcp-seq-validation" format="default"/>, which i ncludes descriptions of potential problems in conditions of simultaneous open, s elf-connects, simultaneous close, and simultaneous window probes. The document also describes potential changes to the TCP specification to mitigate the issue by expanding the acceptable sequence numbers. | |||
</t> | </t> | |||
<t> | <t> | |||
In Internet usage of TCP, these conditions are rarely occurring. Common operati | In Internet usage of TCP, these conditions rarely occur. Common operating syste | |||
ng systems include different alternative mitigations, and the standard has not b | ms include different alternative mitigations, and the standard has not been upda | |||
een updated yet to codify one of them, but implementers should consider the prob | ted yet to codify one of them, but implementers should consider the problems des | |||
lems described in <xref target="I-D.gont-tcpm-tcp-seq-validation"/>. | cribed in <xref target="I-D.gont-tcpm-tcp-seq-validation" format="default"/>. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Nagle Modification" anchor="minshall"> | <section anchor="minshall" numbered="true" toc="default"> | |||
<t>In common operating systems, both the Nagle algorithm and delayed acknowledge | <name>Nagle Modification</name> | |||
ments are implemented and enabled by default. TCP is used by many applications | <t>In common operating systems, both the Nagle algorithm and delayed ack | |||
that have a request-response style of communication, where the combination of th | nowledgments are implemented and enabled by default. TCP is used by many applic | |||
e Nagle algorithm and delayed acknowledgements can result in poor application pe | ations that have a request-response style of communication, where the combinatio | |||
rformance. A modification to the Nagle algorithm is described in <xref target=" | n of the Nagle algorithm and delayed acknowledgments can result in poor applicat | |||
draft-minshall-nagle"/> that improves the situation for these applications. | ion performance. A modification to the Nagle algorithm is described in <xref ta | |||
rget="I-D.minshall-nagle" format="default"/> that improves the situation for the | ||||
se applications. | ||||
</t> | </t> | |||
<t>This modification is implemented in some common operating systems, and does n | <t>This modification is implemented in some common operating systems and | |||
ot impact TCP interoperability. Additionally, many applications simply disable | does not impact TCP interoperability. Additionally, many applications simply d | |||
Nagle, since this is generally supported by a socket option. The TCP standard h | isable Nagle since this is generally supported by a socket option. The TCP stan | |||
as not been updated to include this Nagle modification, but implementers may fin | dard has not been updated to include this Nagle modification, but implementers m | |||
d it beneficial to consider.</t> | ay find it beneficial to consider.</t> | |||
</section> | </section> | |||
<section title="Low Watermark Settings"> | <section numbered="true" toc="default"> | |||
<t>Some operating system kernel TCP implementations include socket options | <name>Low Watermark Settings</name> | |||
that allow specifying the number of bytes in the buffer until the socket layer w | <t>Some operating system kernel TCP implementations include socket optio | |||
ill pass sent data to TCP (SO_SNDLOWAT) or to the application on receiving (SO_R | ns that allow specifying the number of bytes in the buffer until the socket laye | |||
CVLOWAT).</t> | r will pass sent data to TCP (SO_SNDLOWAT) or to the application on receiving (S | |||
<t>In addition, another socket option (TCP_NOTSENT_LOWAT) can be used to co | O_RCVLOWAT).</t> | |||
ntrol the amount of unsent bytes in the write queue. This can help a sending TCP | <t>In addition, another socket option (TCP_NOTSENT_LOWAT) can be used to | |||
application to avoid creating large amounts of buffered data (and corresponding | control the amount of unsent bytes in the write queue. This can help a sending | |||
latency). As an example, this may be useful for applications that are multiplex | TCP application to avoid creating large amounts of buffered data (and correspond | |||
ing data from multiple upper level streams onto a connection, especially when st | ing latency). As an example, this may be useful for applications that are multip | |||
reams may be a mix of interactive / real-time and bulk data transfer.</t> | lexing data from multiple upper-level streams onto a connection, especially when | |||
</section> | streams may be a mix of interactive/real-time and bulk data transfer.</t> | |||
</section> | ||||
</section> | </section> | |||
<section anchor="reqs" numbered="true" toc="default"> | ||||
<name>TCP Requirement Summary</name> | ||||
<t>This section is adapted from RFC 1122.</t> | ||||
<t>Note that there is no requirement related to PLPMTUD in this list, but | ||||
that PLPMTUD is recommended.</t> | ||||
<section title="TCP Requirement Summary" anchor="reqs"> | <table anchor="tcp-req-summary"> | |||
<t>This section is adapted from RFC 1122.</t> | <name>TCP Requirements Summary</name> | |||
<t>Note that there is no requirement related to PLPMTUD in this list, bu | <thead> | |||
t that PLPMTUD is recommended.</t> | <tr> | |||
<figure> | <th align="center">Feature</th> | |||
<artwork> | <th align="center">ReqID</th> | |||
<th align="center"><bcp14>MUST</bcp14></th> | ||||
| | | | |S| | | <th align="center"><bcp14>SHOULD</bcp14></th> | |||
| | | | |H| |F | <th align="center"><bcp14>MAY</bcp14></th> | |||
| | | | |O|M|o | <th align="center"><bcp14>SHOULD NOT</bcp14></th> | |||
| | |S| |U|U|o | <th align="center"><bcp14>MUST NOT</bcp14></th> | |||
| | |H| |L|S|t | </tr> | |||
| |M|O| |D|T|n | </thead> | |||
| |U|U|M| | |o | <tbody> | |||
| |S|L|A|N|N|t | <tr> | |||
| |T|D|Y|O|O|t | <th colspan="7">PUSH flag</th> | |||
FEATURE | ReqID | | | |T|T|e | </tr> | |||
| | | | | | | | <tr> | |||
Push flag | | | | | | | | <td>Aggregate or queue un-pushed data</td> | |||
Aggregate or queue un-pushed data | MAY-16 | | |x| | | | <td>MAY-16</td> | |||
Sender collapse successive PSH flags | SHLD-27| |x| | | | | <td> </td> | |||
SEND call can specify PUSH | MAY-15 | | |x| | | | <td> </td> | |||
If cannot: sender buffer indefinitely | MUST-60| | | | |x| | <td align="center">X</td> | |||
If cannot: PSH last segment | MUST-61|x| | | | | | <td> </td> | |||
Notify receiving ALP of PSH | MAY-17 | | |x| | |1 | <td> </td> | |||
Send max size segment when possible | SHLD-28| |x| | | | | </tr> | |||
| | | | | | | | <tr> | |||
Window | | | | | | | | <td>Sender collapse successive PSH bits</td> | |||
Treat as unsigned number | MUST-1 |x| | | | | | <td>SHLD-27</td> | |||
Handle as 32-bit number | REC-1 | |x| | | | | <td> </td> | |||
Shrink window from right | SHLD-14| | | |x| | | <td align="center">X</td> | |||
- Send new data when window shrinks | SHLD-15| | | |x| | | <td> </td> | |||
- Retransmit old unacked data within window | SHLD-16| |x| | | | | <td> </td> | |||
- Time out conn for data past right edge | SHLD-17| | | |x| | | <td> </td> | |||
Robust against shrinking window | MUST-34|x| | | | | | </tr> | |||
Receiver's window closed indefinitely | MAY-8 | | |x| | | | <tr> | |||
Use standard probing logic | MUST-35|x| | | | | | <td>SEND call can specify PUSH</td> | |||
Sender probe zero window | MUST-36|x| | | | | | <td>MAY-15</td> | |||
First probe after RTO | SHLD-29| |x| | | | | <td> </td> | |||
Exponential backoff | SHLD-30| |x| | | | | <td> </td> | |||
Allow window stay zero indefinitely | MUST-37|x| | | | | | <td align="center">X</td> | |||
Retransmit old data beyond SND.UNA+SND.WND | MAY-7 | | |x| | | | <td> </td> | |||
Process RST and URG even with zero window | MUST-66|x| | | | | | <td> </td> | |||
| | | | | | | | </tr> | |||
Urgent Data | | | | | | | | <tr> | |||
Include support for urgent pointer | MUST-30|x| | | | | | <td><ul><li>If cannot: sender buffer indefinitely</li></ul></td> | |||
Pointer indicates first non-urgent octet | MUST-62|x| | | | | | <td>MUST-60</td> | |||
Arbitrary length urgent data sequence | MUST-31|x| | | | | | <td> </td> | |||
Inform ALP asynchronously of urgent data | MUST-32|x| | | | |1 | <td> </td> | |||
ALP can learn if/how much urgent data Q'd | MUST-33|x| | | | |1 | <td> </td> | |||
ALP employ the urgent mechanism | SHLD-13| | | |x| | | <td> </td> | |||
| | | | | | | | <td align="center">X</td> | |||
TCP Options | | | | | | | | </tr> | |||
Support the mandatory option set | MUST-4 |x| | | | | | <tr> | |||
Receive TCP option in any segment | MUST-5 |x| | | | | | <td><ul><li>If cannot: PSH last segment</li></ul></td> | |||
Ignore unsupported options | MUST-6 |x| | | | | | <td>MUST-61</td> | |||
Include length for all options except EOL+NOP | MUST-68|x| | | | | | <td align="center">X</td> | |||
Cope with illegal option length | MUST-7 |x| | | | | | <td> </td> | |||
Process options regardless of word alignment | MUST-64|x| | | | | | <td> </td> | |||
Implement sending & receiving MSS option | MUST-14|x| | | | | | <td> </td> | |||
IPv4 Send MSS option unless 536 | SHLD-5 | |x| | | | | <td> </td> | |||
IPv6 Send MSS option unless 1220 | SHLD-5 | |x| | | | | </tr> | |||
Send MSS option always | MAY-3 | | |x| | | | <tr> | |||
IPv4 Send-MSS default is 536 | MUST-15|x| | | | | | <td>Notify receiving ALP<sup>1</sup> of PSH</td> | |||
IPv6 Send-MSS default is 1220 | MUST-15|x| | | | | | <td>MAY-17</td> | |||
Calculate effective send seg size | MUST-16|x| | | | | | <td> </td> | |||
MSS accounts for varying MTU | SHLD-6 | |x| | | | | <td> </td> | |||
MSS not sent on non-SYN segments | MUST-65| | | | |x| | <td align="center">X</td> | |||
MSS value based on MMS_R | MUST-67|x| | | | | | <td> </td> | |||
Pad with zero | MUST-69|x| | | | | | <td> </td> | |||
| | | | | | | | </tr> | |||
TCP Checksums | | | | | | | | <tr> | |||
Sender compute checksum | MUST-2 |x| | | | | | <td>Send max size segment when possible</td> | |||
Receiver check checksum | MUST-3 |x| | | | | | <td>SHLD-28</td> | |||
| | | | | | | | <td> </td> | |||
ISN Selection | | | | | | | | <td align="center">X</td> | |||
Include a clock-driven ISN generator component | MUST-8 |x| | | | | | <td> </td> | |||
Secure ISN generator with a PRF component | SHLD-1 | |x| | | | | <td> </td> | |||
PRF computable from outside the host | MUST-9 | | | | |x| | <td> </td> | |||
| | | | | | | | </tr> | |||
Opening Connections | | | | | | | | <tr> | |||
Support simultaneous open attempts | MUST-10|x| | | | | | <th colspan="7">Window</th> | |||
SYN-RECEIVED remembers last state | MUST-11|x| | | | | | </tr> | |||
Passive Open call interfere with others | MUST-41| | | | |x| | <tr> | |||
Function: simultan. LISTENs for same port | MUST-42|x| | | | | | <td>Treat as unsigned number</td> | |||
Ask IP for src address for SYN if necc. | MUST-44|x| | | | | | <td>MUST-1</td> | |||
Otherwise, use local addr of conn. | MUST-45|x| | | | | | <td align="center">X</td> | |||
OPEN to broadcast/multicast IP Address | MUST-46| | | | |x| | <td> </td> | |||
Silently discard seg to bcast/mcast addr | MUST-57|x| | | | | | <td> </td> | |||
| | | | | | | | <td> </td> | |||
Closing Connections | | | | | | | | <td> </td> | |||
RST can contain data | SHLD-2 | |x| | | | | </tr> | |||
Inform application of aborted conn | MUST-12|x| | | | | | <tr> | |||
Half-duplex close connections | MAY-1 | | |x| | | | <td>Handle as 32-bit number</td> | |||
Send RST to indicate data lost | SHLD-3 | |x| | | | | <td>REC-1</td> | |||
In TIME-WAIT state for 2MSL seconds | MUST-13|x| | | | | | <td> </td> | |||
Accept SYN from TIME-WAIT state | MAY-2 | | |x| | | | <td align="center">X</td> | |||
Use Timestamps to reduce TIME-WAIT | SHLD-4 | |x| | | | | <td> </td> | |||
| | | | | | | | <td> </td> | |||
Retransmissions | | | | | | | | <td> </td> | |||
Implement exponential backoff, slow start, and | MUST-19|x| | | | | | </tr> | |||
congestion avoidance | | | | | | | | <tr> | |||
Retransmit with same IP ident | MAY-4 | | |x| | | | <td>Shrink window from right</td> | |||
Karn's algorithm | MUST-18|x| | | | | | <td>SHLD-14</td> | |||
| | | | | | | | <td> </td> | |||
Generating ACKs: | | | | | | | | <td> </td> | |||
Aggregate whenever possible | MUST-58|x| | | | | | <td> </td> | |||
Queue out-of-order segments | SHLD-31| |x| | | | | <td align="center">X</td> | |||
Process all Q'd before send ACK | MUST-59|x| | | | | | <td> </td> | |||
Send ACK for out-of-order segment | MAY-13 | | |x| | | | </tr> | |||
Delayed ACKs | SHLD-18| |x| | | | | <tr> | |||
Delay < 0.5 seconds | MUST-40|x| | | | | | <td><ul><li>Send new data when window shrinks</li></ul></td> | |||
Every 2nd full-sized segment or 2*RMSS ACK'd | SHLD-19| |x| | | | | <td>SHLD-15</td> | |||
Receiver SWS-Avoidance Algorithm | MUST-39|x| | | | | | <td> </td> | |||
| | | | | | | | <td> </td> | |||
Sending data | | | | | | | | <td> </td> | |||
Configurable TTL | MUST-49|x| | | | | | <td align="center">X</td> | |||
Sender SWS-Avoidance Algorithm | MUST-38|x| | | | | | <td> </td> | |||
Nagle algorithm | SHLD-7 | |x| | | | | </tr> | |||
Application can disable Nagle algorithm | MUST-17|x| | | | | | <tr> | |||
| | | | | | | | <td><ul><li>Retransmit old unacked data within window</li></ul></td> | |||
Connection Failures: | | | | | | | | <td>SHLD-16</td> | |||
Negative advice to IP on R1 retxs | MUST-20|x| | | | | | <td> </td> | |||
Close connection on R2 retxs | MUST-20|x| | | | | | <td align="center">X</td> | |||
ALP can set R2 | MUST-21|x| | | | |1 | <td> </td> | |||
Inform ALP of R1<=retxs<R2 | SHLD-9 | |x| | | |1 | <td> </td> | |||
Recommended value for R1 | SHLD-10| |x| | | | | <td> </td> | |||
Recommended value for R2 | SHLD-11| |x| | | | | </tr> | |||
Same mechanism for SYNs | MUST-22|x| | | | | | <tr> | |||
R2 at least 3 minutes for SYN | MUST-23|x| | | | | | <td><ul><li>Time out conn for data past right edge</li></ul></td> | |||
| | | | | | | | <td>SHLD-17</td> | |||
Send Keep-alive Packets: | MAY-5 | | |x| | | | <td> </td> | |||
- Application can request | MUST-24|x| | | | | | <td> </td> | |||
- Default is "off" | MUST-25|x| | | | | | <td> </td> | |||
- Only send if idle for interval | MUST-26|x| | | | | | <td align="center">X</td> | |||
- Interval configurable | MUST-27|x| | | | | | <td> </td> | |||
- Default at least 2 hrs. | MUST-28|x| | | | | | </tr> | |||
- Tolerant of lost ACKs | MUST-29|x| | | | | | <tr> | |||
- Send with no data | SHLD-12| |x| | | | | <td>Robust against shrinking window</td> | |||
- Configurable to send garbage octet | MAY-6 | | |x| | | | <td>MUST-34</td> | |||
| | | | | | | | <td align="center">X</td> | |||
IP Options | | | | | | | | <td> </td> | |||
Ignore options TCP doesn't understand | MUST-50|x| | | | | | <td> </td> | |||
Time Stamp support | MAY-10 | | |x| | | | <td> </td> | |||
Record Route support | MAY-11 | | |x| | | | <td> </td> | |||
Source Route: | | | | | | | | </tr> | |||
ALP can specify | MUST-51|x| | | | |1 | <tr> | |||
Overrides src rt in datagram | MUST-52|x| | | | | | <td>Receiver's window closed indefinitely</td> | |||
Build return route from src rt | MUST-53|x| | | | | | <td>MAY-8</td> | |||
Later src route overrides | SHLD-24| |x| | | | | <td> </td> | |||
| | | | | | | | <td> </td> | |||
Receiving ICMP Messages from IP | MUST-54|x| | | | | | <td align="center">X</td> | |||
Dest. Unreach (0,1,5) => inform ALP | SHLD-25| |x| | | | | <td> </td> | |||
Abort on Dest. Unreach (0,1,5) =>nn | MUST-56| | | | |x| | <td> </td> | |||
Dest. Unreach (2-4) => abort conn | SHLD-26| |x| | | | | </tr> | |||
Source Quench => silent discard | MUST-55|x| | | | | | <tr> | |||
Abort on Time Exceeded => | MUST-56| | | | |x| | <td>Use standard probing logic</td> | |||
Abort on Param Problem => | MUST-56| | | | |x| | <td>MUST-35</td> | |||
| | | | | | | | <td align="center">X</td> | |||
Address Validation | | | | | | | | <td> </td> | |||
Reject OPEN call to invalid IP address | MUST-46|x| | | | | | <td> </td> | |||
Reject SYN from invalid IP address | MUST-63|x| | | | | | <td> </td> | |||
Silently discard SYN to bcast/mcast addr | MUST-57|x| | | | | | <td> </td> | |||
| | | | | | | | </tr> | |||
TCP/ALP Interface Services | | | | | | | | <tr> | |||
Error Report mechanism | MUST-47|x| | | | | | <td>Sender probe zero window</td> | |||
ALP can disable Error Report Routine | SHLD-20| |x| | | | | <td>MUST-36</td> | |||
ALP can specify DiffServ field for sending | MUST-48|x| | | | | | <td align="center">X</td> | |||
Passed unchanged to IP | SHLD-22| |x| | | | | <td> </td> | |||
ALP can change DiffServ field during connection| SHLD-21| |x| | | | | <td> </td> | |||
ALP generally changing DiffServ during conn. | SHLD-23| | | |x| | | <td> </td> | |||
Pass received DiffServ field up to ALP | MAY-9 | | |x| | | | <td> </td> | |||
FLUSH call | MAY-14 | | |x| | | | </tr> | |||
Optional local IP addr parm. in OPEN | MUST-43|x| | | | | | <tr> | |||
| | | | | | | | <td><ul><li>First probe after RTO</li></ul></td> | |||
RFC 5961 Support: | | | | | | | | <td>SHLD-29</td> | |||
Implement data injection protection | MAY-12 | | |x| | | | <td> </td> | |||
| | | | | | | | <td align="center">X</td> | |||
Explicit Congestion Notification: | | | | | | | | <td> </td> | |||
Support ECN | SHLD-8 | |x| | | | | <td> </td> | |||
| | | | | | | | <td> </td> | |||
Alternative Congestion Control: | | | | | | | | </tr> | |||
Implement alternative conformant algorithm(s) | MAY-18 | | |x| | | | <tr> | |||
<td><ul><li>Exponential backoff</li></ul></td> | ||||
</artwork></figure> | <td>SHLD-30</td> | |||
<t> | <td> </td> | |||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Allow window stay zero indefinitely</td> | ||||
<td>MUST-37</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Retransmit old data beyond SND.UNA+SND.WND</td> | ||||
<td>MAY-7</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Process RST and URG even with zero window</td> | ||||
<td>MUST-66</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Urgent Data</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Include support for urgent pointer</td> | ||||
<td>MUST-30</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Pointer indicates first non-urgent octet</td> | ||||
<td>MUST-62</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Arbitrary length urgent data sequence</td> | ||||
<td>MUST-31</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Inform ALP<sup>1</sup> asynchronously of urgent data </td> | ||||
<td>MUST-32</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP<sup>1</sup> can learn if/how much urgent data Q'd</td> | ||||
<td>MUST-33</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP employ the urgent mechanism</td> | ||||
<td>SHLD-13</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">TCP Options</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Support the mandatory option set</td> | ||||
<td>MUST-4</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Receive TCP Option in any segment</td> | ||||
<td>MUST-5</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Ignore unsupported options</td> | ||||
<td>MUST-6</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Include length for all options except EOL+NOP</td> | ||||
<td>MUST-68</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Cope with illegal option length</td> | ||||
<td>MUST-7</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Process options regardless of word alignment</td> | ||||
<td>MUST-64</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Implement sending & receiving MSS Option</td> | ||||
<td>MUST-14</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>IPv4 Send MSS Option unless 536</td> | ||||
<td>SHLD-5</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>IPv6 Send MSS Option unless 1220</td> | ||||
<td>SHLD-5</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Send MSS Option always</td> | ||||
<td>MAY-3</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>IPv4 Send-MSS default is 536</td> | ||||
<td>MUST-15</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>IPv6 Send-MSS default is 1220</td> | ||||
<td>MUST-15</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Calculate effective send seg size</td> | ||||
<td>MUST-16</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>MSS accounts for varying MTU</td> | ||||
<td>SHLD-6</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>MSS not sent on non-SYN segments</td> | ||||
<td>MUST-65</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<td>MSS value based on MMS_R</td> | ||||
<td>MUST-67</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Pad with zero</td> | ||||
<td>MUST-69</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">TCP Checksums</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Sender compute checksum</td> | ||||
<td>MUST-2</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Receiver check checksum</td> | ||||
<td>MUST-3</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">ISN Selection</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Include a clock-driven ISN generator component</td> | ||||
<td>MUST-8</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Secure ISN generator with a PRF component</td> | ||||
<td>SHLD-1</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>PRF computable from outside the host</td> | ||||
<td>MUST-9</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Opening Connections</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Support simultaneous open attempts</td> | ||||
<td>MUST-10</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>SYN-RECEIVED remembers last state</td> | ||||
<td>MUST-11</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Passive OPEN call interfere with others</td> | ||||
<td>MUST-41</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<td>Function: simultaneously LISTENs for same port</td> | ||||
<td>MUST-42</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Ask IP for src address for SYN if necessary</td> | ||||
<td>MUST-44</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Otherwise, use local addr of connection</li></ul></td> | ||||
<td>MUST-45</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>OPEN to broadcast/multicast IP address</td> | ||||
<td>MUST-46</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<td>Silently discard seg to bcast/mcast addr</td> | ||||
<td>MUST-57</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Closing Connections</th> | ||||
</tr> | ||||
<tr> | ||||
<td>RST can contain data</td> | ||||
<td>SHLD-2</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Inform application of aborted conn</td> | ||||
<td>MUST-12</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Half-duplex close connections</td> | ||||
<td>MAY-1</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Send RST to indicate data lost</li></ul></td> | ||||
<td>SHLD-3</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>In TIME-WAIT state for 2MSL seconds</td> | ||||
<td>MUST-13</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Accept SYN from TIME-WAIT state</li></ul></td> | ||||
<td>MAY-2</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Use Timestamps to reduce TIME-WAIT</li></ul></td> | ||||
<td>SHLD-4</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Retransmissions</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Implement exponential backoff, slow start, and congestion avoidance</t | ||||
d> | ||||
<td>MUST-19</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Retransmit with same IP identity</td> | ||||
<td>MAY-4</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Karn's algorithm</td> | ||||
<td>MUST-18</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Generating ACKs</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Aggregate whenever possible</td> | ||||
<td>MUST-58</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Queue out-of-order segments</td> | ||||
<td>SHLD-31</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Process all Q'd before send ACK</td> | ||||
<td>MUST-59</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Send ACK for out-of-order segment</td> | ||||
<td>MAY-13</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Delayed ACKs</td> | ||||
<td>SHLD-18</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Delay < 0.5 seconds</li></ul></td> | ||||
<td>MUST-40</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Every 2nd full-sized segment or 2*RMSS ACK'd</li></ul></td> | ||||
<td>SHLD-19</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Receiver SWS-Avoidance Algorithm</td> | ||||
<td>MUST-39</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Sending Data</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Configurable TTL</td> | ||||
<td>MUST-49</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Sender SWS-Avoidance Algorithm </td> | ||||
<td>MUST-38</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Nagle algorithm</td> | ||||
<td>SHLD-7</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Application can disable Nagle algorithm</li></ul></td> | ||||
<td>MUST-17</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Connection Failures</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Negative advice to IP on R1 retransmissions</td> | ||||
<td>MUST-20</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Close connection on R2 retransmissions</td> | ||||
<td>MUST-20</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP<sup>1</sup> can set R2</td> | ||||
<td>MUST-21</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Inform ALP of R1<=retxs<R2 </td> | ||||
<td>SHLD-9</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Recommended value for R1</td> | ||||
<td>SHLD-10</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Recommended value for R2</td> | ||||
<td>SHLD-11</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Same mechanism for SYNs</td> | ||||
<td>MUST-22</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>R2 at least 3 minutes for SYN</li></ul></td> | ||||
<td>MUST-23</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Send Keep-alive Packets</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Send Keep-alive Packets:</td> | ||||
<td>MAY-5</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Application can request</li></ul></td> | ||||
<td>MUST-24</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Default is "off"</li></ul></td> | ||||
<td>MUST-25</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Only send if idle for interval</li></ul></td> | ||||
<td>MUST-26</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Interval configurable</li></ul></td> | ||||
<td>MUST-27</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Default at least 2 hrs.</li></ul></td> | ||||
<td>MUST-28</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Tolerant of lost ACKs</li></ul></td> | ||||
<td>MUST-29</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Send with no data</li></ul></td> | ||||
<td>SHLD-12</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Configurable to send garbage octet</li></ul></td> | ||||
<td>MAY-6</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">IP Options</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Ignore options TCP doesn't understand</td> | ||||
<td>MUST-50</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Timestamp support</td> | ||||
<td>MAY-10</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Record Route support</td> | ||||
<td>MAY-11</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Source Route:</td> | ||||
<td></td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>ALP<sup>1</sup> can specify</li></ul></td> | ||||
<td>MUST-51</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul indent="6"><li>Overrides src route in datagram</li></ul></td> | ||||
<td>MUST-52</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Build return route from src route</li></ul></td> | ||||
<td>MUST-53</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Later src route overrides</li></ul></td> | ||||
<td>SHLD-24</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Receiving ICMP Messages from IP</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Receiving ICMP messages from IP</td> | ||||
<td>MUST-54</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Dest Unreach (0,1,5) => inform ALP</li></ul></td> | ||||
<td>SHLD-25</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Abort on Dest Unreach (0,1,5)</li></ul></td> | ||||
<td>MUST-56</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Dest Unreach (2-4) => abort conn</li></ul></td> | ||||
<td>SHLD-26</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Source Quench => silent discard</li></ul></td> | ||||
<td>MUST-55</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Abort on Time Exceeded</li></ul></td> | ||||
<td>MUST-56</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Abort on Param Problem</li></ul></td> | ||||
<td>MUST-56</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Address Validation</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Reject OPEN call to invalid IP address</td> | ||||
<td>MUST-46</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Reject SYN from invalid IP address</td> | ||||
<td>MUST-63</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Silently discard SYN to bcast/mcast addr</td> | ||||
<td>MUST-57</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">TCP/ALP Interface Services</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Error Report mechanism</td> | ||||
<td>MUST-47</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP can disable Error Report Routine</td> | ||||
<td>SHLD-20</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP can specify Diffserv field for sending</td> | ||||
<td>MUST-48</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td><ul><li>Passed unchanged to IP</li></ul></td> | ||||
<td>SHLD-22</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP can change Diffserv field during connection</td> | ||||
<td>SHLD-21</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>ALP generally changing Diffserv during conn.</td> | ||||
<td>SHLD-23</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Pass received Diffserv field up to ALP</td> | ||||
<td>MAY-9</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>FLUSH call</td> | ||||
<td>MAY-14</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<td>Optional local IP addr param in OPEN</td> | ||||
<td>MUST-43</td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">RFC 5961 Support</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Implement data injection protection</td> | ||||
<td>MAY-12</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Explicit Congestion Notification</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Support ECN</td> | ||||
<td>SHLD-8</td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
<tr> | ||||
<th colspan="7">Alternative Congestion Control</th> | ||||
</tr> | ||||
<tr> | ||||
<td>Implement alternative conformant algorithm(s)</td> | ||||
<td>MAY-18</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
<td align="center">X</td> | ||||
<td> </td> | ||||
<td> </td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t> | ||||
FOOTNOTES: | FOOTNOTES: | |||
(1) "ALP" means Application-Layer Program. | (1) "ALP" means Application-Layer Program. | |||
</t> | </t> | |||
</section> | </section> | |||
</back> | <section numbered="false" toc="default"> | |||
<name>Acknowledgments</name> | ||||
<t> | ||||
This document is largely a revision of RFC 793, of which <contact fullname=" | ||||
Jon Postel"/> was the editor. Due to his excellent work, it was able to last fo | ||||
r three decades before we felt the need to revise it. | ||||
</t> | ||||
<t> | ||||
<contact fullname="Andre Oppermann"/> was a contributor and helped to edit th | ||||
e first revision of this document. | ||||
</t> | ||||
<t> | ||||
We are thankful for the assistance of the IETF TCPM working group chairs ove | ||||
r the course of work on this document: | ||||
</t> | ||||
<contact fullname="Michael Scharf"/> | ||||
<contact fullname="Yoshifumi Nishida"/> | ||||
<contact fullname="Pasi Sarolahti"/> | ||||
<contact fullname="Michael Tüxen"/> | ||||
<t> | ||||
During the discussions of this work on the TCPM mailing list, in | ||||
working group meetings, and via area reviews, helpful comments, | ||||
critiques, and reviews were received from (listed alphabetically | ||||
by last name): <contact fullname="Praveen Balasubramanian"/>, <contact fulln | ||||
ame="David Borman"/>, <contact fullname="Mohamed | ||||
Boucadair"/>, <contact fullname="Bob Briscoe"/>, <contact fullname="Neal Car | ||||
dwell"/>, <contact fullname="Yuchung Cheng"/>, <contact fullname="Martin Duke"/> | ||||
, | ||||
<contact fullname="Francis Dupont"/>, <contact fullname="Ted Faber"/>, <cont | ||||
act fullname="Gorry Fairhurst"/>, <contact fullname="Fernando Gont"/>, <contact | ||||
fullname="Rodney | ||||
Grimes"/>, <contact fullname="Yi Huang"/>, <contact fullname="Rahul Jadhav"/ | ||||
>, <contact fullname="Markku Kojo"/>, <contact fullname="Mike Kosek"/>, <contact | ||||
fullname="Juhamatti | ||||
Kuusisaari"/>, <contact fullname="Kevin Lahey"/>, <contact fullname="Kevin M | ||||
ason"/>, <contact fullname="Matt Mathis"/>, <contact fullname="Stephen | ||||
McQuistin"/>, <contact fullname="Jonathan Morton"/>, <contact fullname="Matt | ||||
Olson"/>, <contact fullname="Tommy Pauly"/>, <contact fullname="Tom Petch"/>, | ||||
<contact fullname="Hagen Paul Pfeifer"/>, <contact fullname="Kyle Rose"/>, <c | ||||
ontact fullname="Anthony Sabatini"/>, <contact fullname="Michael Scharf"/>, | ||||
<contact fullname="Greg Skinner"/>, <contact fullname="Joe Touch"/>, <contac | ||||
t fullname="Michael Tüxen"/>, <contact fullname="Reji Varghese"/>, <contact full | ||||
name="Bernie | ||||
Volz"/>, <contact fullname="Tim Wicinski"/>, <contact fullname="Lloyd Wood"/ | ||||
>, and <contact fullname="Alex Zimmermann"/>. | ||||
</t> | ||||
<t> | ||||
<contact fullname="Joe Touch"/> provided additional help in clarifying the de | ||||
scription of segment size parameters and PMTUD/PLPMTUD recommendations. Markku | ||||
Kojo helped put together the text in the section on TCP Congestion Control. | ||||
</t> | ||||
<t> | ||||
This document includes content from errata that were reported by (listed chr | ||||
onologically): <contact fullname="Yin Shuming"/>, <contact fullname="Bob Braden" | ||||
/>, <contact fullname="Morris M. Keesan"/>, <contact fullname="Pei-chun Cheng"/> | ||||
, <contact fullname="Constantin Hagemeier"/>, <contact fullname="Vishwas Manral" | ||||
/>, <contact fullname="Mykyta Yevstifeyev"/>, <contact fullname="EungJun Yi"/>, | ||||
<contact fullname="Botong Huang"/>, <contact fullname="Charles Deng"/>, <contact | ||||
fullname="Merlin Buge"/>. | ||||
</t> | ||||
</section> | ||||
</back> | ||||
</rfc> | </rfc> | |||
End of changes. 822 change blocks. | ||||
4264 lines changed or deleted | 5824 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |