rfc8926xml2.original.xml | rfc8926.xml | |||
---|---|---|---|---|
<?xml version='1.0' encoding='utf-8'?> | <?xml version="1.0" encoding="UTF-8"?> | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ | ||||
<!ENTITY RFC0768 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.0768.xml"> | ||||
<!ENTITY RFC0792 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.0792.xml"> | ||||
<!ENTITY RFC1112 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.1112.xml"> | ||||
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.2119.xml"> | ||||
<!ENTITY RFC4443 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.4443.xml"> | ||||
<!ENTITY RFC6936 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6936.xml"> | ||||
<!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8126.xml"> | ||||
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8174.xml"> | ||||
<!ENTITY I-D.ietf-nvo3-encap SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3 | ||||
/reference.I-D.draft-ietf-nvo3-encap-05.xml"> | ||||
<!ENTITY I-D.ietf-nvo3-dataplane-requirements SYSTEM "https://xml2rfc.ietf.org/p | ||||
ublic/rfc/bibxml3/reference.I-D.draft-ietf-nvo3-dataplane-requirements-03.xml"> | ||||
<!ENTITY I-D.ietf-intarea-tunnels SYSTEM "https://xml2rfc.ietf.org/public/rfc/bi | ||||
bxml3/reference.I-D.draft-ietf-intarea-tunnels-10.xml"> | ||||
<!--ENTITY IEEE.802.1Q_2014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml6/ | ||||
reference.IEEE.802.1Q_2014.xml"--> | ||||
<!ENTITY RFC1191 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.1191.xml"> | ||||
<!ENTITY RFC2003 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.2003.xml"> | ||||
<!ENTITY RFC8200 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8200.xml"> | ||||
<!ENTITY RFC2983 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.2983.xml"> | ||||
<!ENTITY RFC3031 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.3031.xml"> | ||||
<!ENTITY RFC3552 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.3552.xml"> | ||||
<!ENTITY RFC3985 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.3985.xml"> | ||||
<!ENTITY RFC4301 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.4301.xml"> | ||||
<!ENTITY RFC5374 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.5374.xml"> | ||||
<!ENTITY RFC6040 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6040.xml"> | ||||
<!ENTITY RFC6335 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6335.xml"> | ||||
<!ENTITY RFC6438 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6438.xml"> | ||||
<!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7348.xml"> | ||||
<!ENTITY RFC7365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7365.xml"> | ||||
<!ENTITY RFC7637 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7637.xml"> | ||||
<!ENTITY RFC8014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8014.xml"> | ||||
<!ENTITY RFC8085 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8085.xml"> | ||||
<!ENTITY RFC8086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8086.xml"> | ||||
<!ENTITY RFC8201 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8201.xml"> | ||||
<!ENTITY RFC8293 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8293.xml"> | ||||
]> | ||||
<rfc submissionType="IETF" docName="draft-ietf-nvo3-geneve-16" category="std"><? | ||||
rfc compact="yes"?> | ||||
<?rfc text-list-symbols="o*+-"?> | ||||
<?rfc subcompact="no"?> | ||||
<?rfc sortrefs="yes"?> | ||||
<?rfc symrefs="yes"?> | ||||
<?rfc strict="yes"?> | ||||
<?rfc toc="yes"?> | ||||
<front> | ||||
<title abbrev="Geneve Protocol">Geneve: Generic Network Virtualization En | ||||
capsulation</title> | ||||
<author fullname="Jesse Gross" initials="J." role="editor" surname="Gross | ||||
"> | ||||
<organization></organization> | ||||
<address><email>jesse@kernel.org</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Ilango Ganga" initials="I." role="editor" surname="Gang | <!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent"> | |||
a"> | ||||
<organization abbrev="Intel">Intel Corporation</organization> | ||||
<address><postal><street>2200 Mission College Blvd.</street> | ||||
<street>Santa Clara, CA 95054</street> | ||||
<street>USA</street> | ||||
</postal> | ||||
<email>ilango.s.ganga@intel.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="T. Sridhar" initials="T." role="editor" surname="Sridha | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" ipr="trust200902" docName="draft | |||
r"> | -ietf-nvo3-geneve-16" number="8926" submissionType="IETF" category="std" consens | |||
<organization abbrev="VMware">VMware, Inc.</organization> | us="true" obsoletes="" updates="" xml:lang="en" sortRefs="true" symRefs="true" t | |||
<address><postal><street>3401 Hillview Ave.</street> | ocInclude="true" version="3"> | |||
<street>Palo Alto, CA 94304</street> | ||||
<street>USA</street> | ||||
</postal> | ||||
<email>tsridhar@vmware.com</email> | ||||
</address> | ||||
</author> | ||||
<date day="07" month="March" year="2020"/> | <front> | |||
<abstract><t> | <title abbrev="Geneve Protocol">Geneve: Generic Network Virtualization Encap | |||
sulation</title> | ||||
<seriesInfo name="RFC" value="8926"/> | ||||
<author fullname="Jesse Gross" initials="J." role="editor" surname="Gross"> | ||||
<organization/> | ||||
<address> | ||||
<email>jesse@kernel.org</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Ilango Ganga" initials="I." role="editor" surname="Ganga"> | ||||
<organization abbrev="Intel">Intel Corporation</organization> | ||||
<address> | ||||
<postal> | ||||
<street>2200 Mission College Blvd.</street> | ||||
<city>Santa Clara</city><region>CA</region><code>95054</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>ilango.s.ganga@intel.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="T. Sridhar" initials="T." role="editor" surname="Sridhar"> | ||||
<organization abbrev="VMware">VMware, Inc.</organization> | ||||
<address> | ||||
<postal> | ||||
<street>3401 Hillview Ave.</street> | ||||
<city>Palo Alto</city><region>CA</region><code>94304</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>tsridhar@utexas.edu</email> | ||||
</address> | ||||
</author> | ||||
<date month="November" year="2020"/> | ||||
<keyword>overlay</keyword> | ||||
<keyword>tunnel</keyword> | ||||
<keyword>extensible</keyword> | ||||
<keyword>variable</keyword> | ||||
<keyword>metadata</keyword> | ||||
<keyword>options</keyword> | ||||
<keyword>endpoint</keyword> | ||||
<keyword>transit</keyword> | ||||
<abstract> | ||||
<t> | ||||
Network virtualization involves the cooperation of devices with a | Network virtualization involves the cooperation of devices with a | |||
wide variety of capabilities such as software and hardware tunnel | wide variety of capabilities such as software and hardware tunnel | |||
endpoints, transit fabrics, and centralized control clusters. As a | endpoints, transit fabrics, and centralized control clusters. As a | |||
result of their role in tying together different elements in the | result of their role in tying together different elements of the | |||
system, the requirements on tunnels are influenced by all of these | system, the requirements on tunnels are influenced by all of these | |||
components. Flexibility is therefore the most important aspect of a | components. Therefore, flexibility is the most important aspect of a | |||
tunnel protocol if it is to keep pace with the evolution of the | tunneling protocol if it is to keep pace with the evolution of technology. | |||
system. This document describes Geneve, an encapsulation protocol designed t | This document describes Geneve, an encapsulation protocol designed to | |||
o | ||||
recognize and accommodate these changing capabilities and needs.</t> | recognize and accommodate these changing capabilities and needs.</t> | |||
</abstract> | ||||
</abstract> | </front> | |||
</front> | <middle> | |||
<section anchor="sec-1" numbered="true" toc="default"> | ||||
<middle> | <name>Introduction</name> | |||
<section title="Introduction" anchor="section-1"><t> | <t> | |||
Networking has long featured a variety of tunneling, tagging, and | Networking has long featured a variety of tunneling, tagging, and | |||
other encapsulation mechanisms. However, the advent of network | other encapsulation mechanisms. However, the advent of network | |||
virtualization has caused a surge of renewed interest and a | virtualization has caused a surge of renewed interest and a | |||
corresponding increase in the introduction of new protocols. The | corresponding increase in the introduction of new protocols. The | |||
large number of protocols in this space, for example, ranging all the way fro | large number of protocols in this space -- for example, ranging all the way f | |||
m | rom | |||
VLANs <xref target="IEEE.802.1Q_2018"/> and MPLS <xref target="RFC3031"/> thr | VLANs <xref target="IEEE.802.1Q_2018" format="default"/> and MPLS <xref targe | |||
ough the more recent | t="RFC3031" format="default"/> through the more recent | |||
VXLAN <xref target="RFC7348"/> (Virtual eXtensible Local Area Network) | VXLAN (Virtual eXtensible Local Area Network) <xref target="RFC7348" format= | |||
and NVGRE <xref target="RFC7637"/> (Network Virtualization Using Generic Rout | "default"/> | |||
ing Encapsulation), often | and NVGRE (Network Virtualization | |||
Using Generic Routing Encapsulation) <xref target="RFC7637" | ||||
format="default"/> -- often | ||||
leads to questions about the need for new encapsulation formats and | leads to questions about the need for new encapsulation formats and | |||
what it is about network virtualization in particular that leads to | what it is about network virtualization in particular that leads to | |||
their proliferation. Note that the list of protocols presented above is non-e xhaustive.</t> | their proliferation. Note that the list of protocols presented above is non-e xhaustive.</t> | |||
<t> | ||||
<t> | ||||
While many encapsulation protocols seek to simply partition the | While many encapsulation protocols seek to simply partition the | |||
underlay network or bridge between two domains, network | underlay network or bridge two domains, network | |||
virtualization views the transit network as providing connectivity | virtualization views the transit network as providing connectivity | |||
between multiple components of a distributed system. In many ways | between multiple components of a distributed system. In many ways, | |||
this system is similar to a chassis switch with the IP underlay | this system is similar to a chassis switch with the IP underlay | |||
network playing the role of the backplane and tunnel endpoints on the | network playing the role of the backplane and tunnel endpoints on the | |||
edge as line cards. When viewed in this light, the requirements | edge as line cards. When viewed in this light, the requirements | |||
placed on the tunnel protocol are significantly different in terms of | placed on the tunneling protocol are significantly different in terms of | |||
the quantity of metadata necessary and the role of transit nodes.</t> | the quantity of metadata necessary and the role of transit nodes.</t> | |||
<t> | ||||
<t> | Work such as "VL2: A Scalable and Flexible Data Center Network" <xref target= | |||
Work such as <xref target="VL2"/> (A Scalable and Flexible Data Center Networ | "VL2" format="default"/> and "NVO3 Data Plane Requirements" <xref target="I-D.ie | |||
k) | tf-nvo3-dataplane-requirements" format="default"/> | |||
and the NVO3 Data Plane Requirements <xref target="I-D.ietf-nvo3-dataplane-re | ||||
quirements"/> | ||||
have described some of the properties that the data plane must have to suppor t network | have described some of the properties that the data plane must have to suppor t network | |||
virtualization. However, one additional defining requirement is the | virtualization. However, one additional defining requirement is the | |||
need to carry metadata (e.g. system state) along with the packet data; | need to carry metadata (e.g., system state) along with the packet data; | |||
example use cases of metadata are noted below. The use of | example use cases of metadata are noted below. The use of | |||
some metadata is certainly not a foreign concept - nearly all | some metadata is certainly not a foreign concept -- nearly all | |||
protocols used for network virtualization have at least 24 bits of identifier | protocols used for network virtualization have at least 24 bits of identifier | |||
space as a way to partition between tenants. This is often described | space as a way to partition between tenants. This is often described | |||
as overcoming the limits of 12-bit VLANs, and when seen in that | as overcoming the limits of 12-bit VLANs; when seen in that | |||
context, or any context where it is a true tenant identifier, 16 | context or any context where it is a true tenant identifier, 16 | |||
million possible entries is a large number. However, the reality is | million possible entries is a large number. However, the reality is | |||
that the metadata is not exclusively used to identify tenants and | that the metadata is not exclusively used to identify tenants, and | |||
encoding other information quickly starts to crowd the space. In | encoding other information quickly starts to crowd the space. In | |||
fact, when compared to the tags used to exchange metadata between | fact, when compared to the tags used to exchange metadata between | |||
line cards on a chassis switch, 24-bit identifiers start to look | line cards on a chassis switch, 24-bit identifiers start to look | |||
quite small. There are nearly endless uses for this metadata, | quite small. There are nearly endless uses for this metadata, | |||
ranging from storing input port identifiers for simple security policies to | ranging from storing input port identifiers for simple security policies to | |||
sending service based context for advanced middlebox applications | sending service-based context for advanced middlebox applications | |||
that terminate and re-encapsulate Geneve traffic.</t> | that terminate and re-encapsulate Geneve traffic.</t> | |||
<t> | ||||
<t> | Existing tunneling protocols have each attempted to solve different | |||
Existing tunnel protocols have each attempted to solve different | aspects of these new requirements only to be quickly rendered out of | |||
aspects of these new requirements, only to be quickly rendered out of | ||||
date by changing control plane implementations and advancements. | date by changing control plane implementations and advancements. | |||
Furthermore, software and hardware components and controllers all | Furthermore, software and hardware components and controllers all | |||
have different advantages and rates of evolution - a fact that should | have different advantages and rates of evolution -- a fact that should | |||
be viewed as a benefit, not a liability or limitation. This draft | be viewed as a benefit, not a liability or limitation. This document describ | |||
describes Geneve, a protocol which seeks to avoid these problems by | es Geneve, a protocol that seeks to avoid these problems by | |||
providing a framework for tunneling for network virtualization rather | providing a framework for tunneling for network virtualization rather | |||
than being prescriptive about the entire system.</t> | than being prescriptive about the entire system.</t> | |||
<section anchor="sec-1.1" numbered="true" toc="default"> | ||||
<section title="Requirements Language" anchor="section-1.1"><t> | <name>Requirements Language</name> | |||
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", | <t> | |||
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONA | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUI | |||
L" in this | RED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>", | |||
document are to be interpreted as described in BCP 14 <xref target="RFC2119"/ | "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14>RECOMMENDED</bc | |||
> | p14>", "<bcp14>NOT RECOMMENDED</bcp14>", "<bcp14>MAY</bcp14>", and "<bcp14>OPTIO | |||
<xref target="RFC8174"/> when, and only when, they appear in all | NAL</bcp14>" in this | |||
document are to be interpreted as described in BCP 14 <xref target="RFC2119" | ||||
format="default"/> | ||||
<xref target="RFC8174" format="default"/> when, and only when, they ap | ||||
pear in all | ||||
capitals, as shown here.</t> | capitals, as shown here.</t> | |||
</section> | ||||
</section> | <section anchor="sec-1.2" numbered="true" toc="default"> | |||
<name>Terminology</name> | ||||
<section title="Terminology" anchor="section-1.2"><t> | <t> | |||
The NVO3 Framework <xref target="RFC7365"/> defines many of the concepts comm | The Network | |||
only | Virtualization over Layer 3 (NVO3) Framework <xref target="RFC7365" format="d | |||
efault"/> defines many of the concepts commonly | ||||
used in network virtualization. In addition, the following terms are | used in network virtualization. In addition, the following terms are | |||
specifically meaningful in this document:</t> | specifically meaningful in this document:</t> | |||
<dl newline="false" spacing="normal"> | ||||
<t> | <dt>Checksum offload:</dt> | |||
Checksum offload. An optimization implemented by many NICs (Network Interfac | <dd>An optimization implemented by many NICs (Network Interface Controllers) | |||
e Controller) which | that enables computation and verification of upper-layer protocol | |||
enables computation and verification of upper layer protocol | ||||
checksums in hardware on transmit and receive, respectively. This | checksums in hardware on transmit and receive, respectively. This | |||
typically includes IP and TCP/UDP checksums which would otherwise be | typically includes IP and TCP/UDP checksums that would otherwise be | |||
computed by the protocol stack in software.</t> | computed by the protocol stack in software.</dd> | |||
<t> | <dt>Clos network:</dt> <dd>A technique for composing network fabrics larger tha | |||
Clos network. A technique for composing network fabrics larger than | n | |||
a single switch while maintaining non-blocking bandwidth across | a single switch while maintaining non-blocking bandwidth across | |||
connection points. ECMP is used to divide traffic across the | connection points. ECMP is used to divide traffic across the | |||
multiple links and switches that constitute the fabric. Sometimes | multiple links and switches that constitute the fabric. Sometimes | |||
termed "leaf and spine" or "fat tree" topologies.</t> | termed "leaf and spine" or "fat tree" topologies.</dd> | |||
<t> | <dt>ECMP:</dt> | |||
ECMP. Equal Cost Multipath. A routing mechanism for selecting from | <dd>Equal Cost Multipath. A routing mechanism for selecting from | |||
among multiple best next hop paths by hashing packet headers in order | among multiple best next-hop paths by hashing packet headers in order | |||
to better utilize network bandwidth while avoiding reordering of packets | to better utilize network bandwidth while avoiding reordering of packets | |||
within a flow.</t> | within a flow.</dd> | |||
<t> | <dt>Geneve:</dt><dd>Generic Network Virtualization Encapsulation. The tunneling | |||
Geneve. Generic Network Virtualization Encapsulation. The tunnel | protocol described in this document.</dd> | |||
protocol described in this document.</t> | ||||
<t> | <dt>LRO:</dt><dd>Large Receive Offload. The receiver-side equivalent function | |||
LRO. Large Receive Offload. The receive-side equivalent function of | of LSO, in which multiple protocol segments (primarily TCP) are coalesced into | |||
LSO, in which multiple protocol segments (primarily TCP) are | larger data units.</dd> | |||
coalesced into larger data units.</t> | ||||
<t> | <dt>LSO:</dt><dd> Large Segmentation Offload. A function provided by many | |||
LSO. Large Segmentation Offload. A function provided by many | ||||
commercial NICs that allows data units larger than the MTU to be | commercial NICs that allows data units larger than the MTU to be | |||
passed to the NIC to improve performance, the NIC being responsible | passed to the NIC to improve performance, the NIC being responsible | |||
for creating smaller segments of size less than or equal to the MTU | for creating smaller segments of a size less than or equal to the MTU | |||
with correct protocol headers. When referring specifically to TCP/ | with correct protocol headers. When referring specifically to TCP/IP, this | |||
IP, this feature is often known as TSO (TCP Segmentation Offload).</t> | feature is often known as TSO (TCP Segmentation Offload).</dd> | |||
<dt> | ||||
<t> | Middlebox:</dt><dd> In the context of this document, the term "middlebox" re | |||
Middlebox. The term middlebox in the context of this document refers to netw | fers to network | |||
ork | service functions or service interposition appliances that typically implemen | |||
service functions or appliances for service interposition that would typicall | t tunnel endpoint functionality, terminating and re-encapsulating Geneve traffic | |||
y | .</dd> | |||
implement NVE functionality, which terminate or re-encapsulate Geneve traffic | <dt>NIC:</dt><dd>Network Interface Controller. Also called "Network Inte | |||
.</t> | rface Card" or "Network Adapter". | |||
A NIC could be part of a tunnel endpoint or transit device and can either | ||||
<t> | process or aid in the processing of Geneve packets.</dd> | |||
NIC. Network Interface Controller. Also called as Network Interface Card or | <dt> | |||
Network Adapter. | Transit device:</dt> <dd> A forwarding element (e.g., router or switch) along | |||
A NIC could be part of a tunnel endpoint or transit device and can either pro | the path of the tunnel | |||
cess Geneve packets or | making up part of the underlay network. A transit device may be | |||
aid in the processing of Geneve packets.</t> | ||||
<t> | ||||
Transit device. A forwarding element (e.g. router or switch) along the path | ||||
of the tunnel | ||||
making up part of the Underlay Network. A transit device may be | ||||
capable of understanding the Geneve packet format but does not | capable of understanding the Geneve packet format but does not | |||
originate or terminate Geneve packets.</t> | originate or terminate Geneve packets.</dd> | |||
<dt> | ||||
<t> | Tunnel endpoint:</dt><dd> A component performing encapsulation and | |||
Tunnel endpoint. A component performing encapsulation and | ||||
decapsulation of packets, such as Ethernet frames or IP datagrams, in | decapsulation of packets, such as Ethernet frames or IP datagrams, in | |||
Geneve headers. As the ultimate consumer of any tunnel metadata, | Geneve headers. As the ultimate consumer of any tunnel metadata, | |||
tunnel endpoints have the highest level of requirements for parsing and | tunnel endpoints have the highest level of requirements for parsing and | |||
interpreting tunnel headers. Tunnel endpoints may consist of either | interpreting tunnel headers. Tunnel endpoints may consist of either | |||
software or hardware implementations or a combination of the two. | software or hardware implementations or a combination of the two. | |||
Tunnel endpoints are frequently a component of an NVE (Network Virtualization | Tunnel endpoints are frequently a component of a Network Virtualization Edge | |||
Edge) | (NVE) | |||
but may also be found in middleboxes or other elements making up an NVO3 Netw | but may also be found in middleboxes or other elements making up an NVO3 netw | |||
ork.</t> | ork.</dd> | |||
<dt>VM:</dt><dd>Virtual Machine.</dd> | ||||
<t> | </dl> | |||
VM. Virtual Machine.</t> | </section> | |||
</section> | ||||
</section> | <section anchor="sec-2" numbered="true" toc="default"> | |||
<name>Design Requirements</name> | ||||
</section> | <t> | |||
Geneve is designed to support network virtualization use cases for data cente | ||||
<section title="Design Requirements" anchor="section-2"><t> | r environments. In these situations, | |||
Geneve is designed to support network virtualization use cases for data cente | ||||
r environments, where | ||||
tunnels are typically established to act as a backplane between the | tunnels are typically established to act as a backplane between the | |||
virtual switches residing in hypervisors, physical switches, or | virtual switches residing in hypervisors, physical switches, or | |||
middleboxes or other appliances. An arbitrary IP network can be used | middleboxes or other appliances. An arbitrary IP network can be used | |||
as an underlay although Clos networks composed using ECMP links are a | as an underlay, although Clos networks composed using ECMP links are a | |||
common choice to provide consistent bisectional bandwidth across all | common choice to provide consistent bisectional bandwidth across all | |||
connection points. Many of the concepts of network virtualization overlays | connection points. Many of the concepts of network virtualization overlays | |||
over Layer 3 IP networks are described in the NVO3 Framework <xref target="RF | over IP networks are described in the NVO3 Framework <xref target="RFC7365" f | |||
C7365"/>. | ormat="default"/>. | |||
Figure 1 shows an example of a hypervisor, top of | <xref target="ref-sample-geneve-deployment"/> shows an example of a | |||
rack switch for connectivity to physical servers, and a WAN uplink | hypervisor, a top-of-rack switch for connectivity to physical servers, and a | |||
WAN uplink | ||||
connected using Geneve tunnels over a simplified Clos network. These | connected using Geneve tunnels over a simplified Clos network. These | |||
tunnels are used to encapsulate and forward frames from the attached | tunnels are used to encapsulate and forward frames from the attached | |||
components such as VMs or physical links.</t> | components, such as VMs or physical links.</t> | |||
<figure anchor="ref-sample-geneve-deployment"> | ||||
<figure title="Sample Geneve Deployment" anchor="ref-sample-geneve-deploy | <name>Sample Geneve Deployment</name> | |||
ment"><artwork><![CDATA[ | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
+---------------------+ +-------+ +------+ | +---------------------+ +-------+ +------+ | |||
| +--+ +-------+---+ | |Transit|--|Top of|==Physical | | +--+ +-------+---+ | |Transit|--|Top of|==Physical | |||
| |VM|--| | | | +------+ /|Router | | Rack |==Servers | | |VM|--| | | | +------+ /|Router | | Rack |==Servers | |||
| +--+ |Virtual|NIC|---|Top of|/ +-------+\/+------+ | | +--+ |Virtual|NIC|---|Top of|/ +-------+\/+------+ | |||
| +--+ |Switch | | | | Rack |\ +-------+/\+------+ | | +--+ |Switch | | | | Rack |\ +-------+/\+------+ | |||
| |VM|--| | | | +------+ \|Transit| |Uplink| WAN | | |VM|--| | | | +------+ \|Transit| |Uplink| WAN | |||
| +--+ +-------+---+ | |Router |--| |=========> | | +--+ +-------+---+ | |Router |--| |=========> | |||
+---------------------+ +-------+ +------+ | +---------------------+ +-------+ +------+ | |||
Hypervisor | Hypervisor | |||
()===================================() | ()===================================() | |||
Switch-Switch Geneve Tunnels | Switch-Switch Geneve Tunnels | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | ||||
To support the needs of network virtualization, the tunnel protocol | <t> | |||
To support the needs of network virtualization, the tunneling protocol | ||||
should be able to take advantage of the differing (and evolving) | should be able to take advantage of the differing (and evolving) | |||
capabilities of each type of device in both the underlay and overlay | capabilities of each type of device in both the underlay and overlay | |||
networks. This results in the following requirements being placed on | networks. This results in the following requirements being placed on | |||
the data plane tunneling protocol:</t> | the data plane tunneling protocol:</t> | |||
<ul spacing="normal"> | ||||
<t><list style="symbols"><t>The data plane is generic and extensible enou | <li>The data plane is generic and extensible enough to support current | |||
gh to support current | and future control planes.</li> | |||
and future control planes.</t> | <li>Tunnel components are efficiently implementable in both hardware | |||
<t>Tunnel components are efficiently implementable in both hardware | ||||
and software without restricting capabilities to the lowest common | and software without restricting capabilities to the lowest common | |||
denominator.</t> | denominator.</li> | |||
<li>High performance over existing IP fabrics is maintained.</li> | ||||
<t>High performance over existing IP fabrics.</t> | </ul> | |||
<t> | ||||
</list> | ||||
</t> | ||||
<t> | ||||
These requirements are described further in the following | These requirements are described further in the following | |||
subsections.</t> | subsections.</t> | |||
<section anchor="sec-2.1" numbered="true" toc="default"> | ||||
<section title="Control Plane Independence" anchor="section-2.1"><t> | <name>Control Plane Independence</name> | |||
<t> | ||||
Although some protocols for network virtualization have included a | Although some protocols for network virtualization have included a | |||
control plane as part of the tunnel format specification (most | control plane as part of the tunnel format specification (most | |||
notably, VXLAN <xref target="RFC7348"/> prescribed a multicast learning- | notably, VXLAN <xref target="RFC7348" format="default"/> prescribed a multica | |||
based control plane), these specifications have largely been treated | st-learning-based control plane), these specifications have largely been treated | |||
as describing only the data format. The VXLAN packet format has | as describing only the data format. The VXLAN packet format has | |||
actually seen a wide variety of control planes built on top of it.</t> | actually seen a wide variety of control planes built on top of it.</t> | |||
<t> | ||||
<t> | ||||
There is a clear advantage in settling on a data format: most of the | There is a clear advantage in settling on a data format: most of the | |||
protocols are only superficially different and there is little | protocols are only superficially different and there is little | |||
advantage in duplicating effort. However, the same cannot be said of | advantage in duplicating effort. However, the same cannot be said of | |||
control planes, which are diverse in very fundamental ways. The case | control planes, which are diverse in very fundamental ways. The case | |||
for standardization is also less clear given the wide variety in | for standardization is also less clear given the wide variety in | |||
requirements, goals, and deployment scenarios.</t> | requirements, goals, and deployment scenarios.</t> | |||
<t> | ||||
<t> | ||||
As a result of this reality, Geneve is a pure tunnel format | As a result of this reality, Geneve is a pure tunnel format | |||
specification that is capable of fulfilling the needs of many control | specification that is capable of fulfilling the needs of many control | |||
planes by explicitly not selecting any one of them. This | planes by explicitly not selecting any one of them. This | |||
simultaneously promotes a shared data format and reduces the | simultaneously promotes a shared data format and reduces the | |||
chance of obsolescence by future control plane | chance of obsolescence by future control plane | |||
enhancements.</t> | enhancements.</t> | |||
</section> | ||||
</section> | <section anchor="sec-2.2" numbered="true" toc="default"> | |||
<name>Data Plane Extensibility</name> | ||||
<section title="Data Plane Extensibility" anchor="section-2.2"><t> | <t> | |||
Achieving the level of flexibility needed to support current and | Achieving the level of flexibility needed to support current and | |||
future control planes effectively requires an options infrastructure | future control planes effectively requires an options infrastructure | |||
to allow new metadata types to be defined, deployed, and either | to allow new metadata types to be defined, deployed, and either | |||
finalized or retired. Options also allow for differentiation of | finalized or retired. Options also allow for differentiation of | |||
products by encouraging independent development in each vendor's core | products by encouraging independent development in each vendor's core | |||
specialty, leading to an overall faster pace of advancement. By far | specialty, leading to an overall faster pace of advancement. By far, | |||
the most common mechanism for implementing options is Type-Length- | the most common mechanism for implementing options is the Type-Length-Value ( | |||
Value (TLV) format.</t> | TLV) format.</t> | |||
<t> | <t> | |||
It should be noted that while options can be used to support non- | It should be noted that, while options can be used to support non-wirespeed | |||
wirespeed control packets, they are equally important on data packets | control packets, they are equally important in data packets | |||
as well to segregate and direct forwarding (for instance, the | as well for segregating and directing forwarding. (For instance, the | |||
examples given before of input port based security policies and | examples given before regarding input-port-based security policies and | |||
terminating/re-encapsulating service interposition both require tags to be pl | terminating/re-encapsulating service interposition both require tags | |||
aced on data | to be placed on data packets.) Therefore, while it would be desirable to lim | |||
packets). Therefore, while it would be desirable to limit the | it the | |||
extensibility to only control packets for the purposes of simplifying | extensibility to only control packets for the purposes of simplifying | |||
the datapath, that would not satisfy the design requirements.</t> | the datapath, that would not satisfy the design requirements.</t> | |||
<section anchor="sec-2.2.1" numbered="true" toc="default"> | ||||
<section title="Efficient Implementation" anchor="section-2.2.1"><t> | <name>Efficient Implementation</name> | |||
<t> | ||||
There is often a conflict between software flexibility and hardware | There is often a conflict between software flexibility and hardware | |||
performance that is difficult to resolve. For a given set of | performance that is difficult to resolve. For a given set of | |||
functionality, it is obviously desirable to maximize performance. | functionality, it is obviously desirable to maximize performance. | |||
However, that does not mean new features that cannot be run at a desired | However, that does not mean new features that cannot be run at a desired | |||
speed today should be disallowed. Therefore, for a protocol to be | speed today should be disallowed. Therefore, for a protocol to be considered | |||
efficiently implementable means that a set of common capabilities can | efficiently implementable, it is expected to have a set of common capabilitie | |||
be reasonably handled across platforms along with a graceful | s that can | |||
be reasonably handled across platforms as well as a graceful | ||||
mechanism to handle more advanced features in the appropriate | mechanism to handle more advanced features in the appropriate | |||
situations.</t> | situations.</t> | |||
<t> | <t> | |||
The use of a variable length header and options in a protocol often | The use of a variable-length header and options in a protocol often | |||
raises questions about whether it is truly efficiently implementable | raises questions about whether the protocol is truly efficiently | |||
in hardware. To answer this question in the context of Geneve, it is | implementable in hardware. To answer this question in the context of Geneve, | |||
it is | ||||
important to first divide "hardware" into two categories: tunnel | important to first divide "hardware" into two categories: tunnel | |||
endpoints and transit devices.</t> | endpoints and transit devices.</t> | |||
<t> | ||||
<t> | Tunnel endpoints must be able to parse the variable-length header, including | |||
Tunnel endpoints must be able to parse the variable header, including any | any | |||
options, and take action. Since these devices are actively | options, and take action. Since these devices are actively | |||
participating in the protocol, they are the most affected by Geneve. | participating in the protocol, they are the most affected by Geneve. | |||
However, as tunnel endpoints are the ultimate consumers of the data, | However, as tunnel endpoints are the ultimate consumers of the data, | |||
transmitters can tailor their output to the capabilities of the | transmitters can tailor their output to the capabilities of the | |||
recipient.</t> | recipient.</t> | |||
<t> | <t> | |||
Transit devices may be able to interpret the options, however, | Transit devices may be able to interpret the options; however, | |||
as non-terminating devices, transit devices | as non-terminating devices, transit devices | |||
do not originate or terminate the Geneve packet, hence MUST NOT modify Geneve | do not originate or terminate the Geneve packet. Hence, they <bcp14>MUST NOT< | |||
headers and | /bcp14> modify Geneve headers and | |||
MUST NOT insert or delete options, which is the responsibility of tunnel endp | <bcp14>MUST NOT</bcp14> insert or delete options, as that is the responsibili | |||
oints. | ty of tunnel endpoints. | |||
Options, if present in the packet, MUST only be generated and terminated by t | Options, if present in the packet, <bcp14>MUST</bcp14> only be generated and | |||
unnel endpoints. | terminated by tunnel endpoints. | |||
The participation of transit devices in interpreting options is | The participation of transit devices in interpreting options is | |||
OPTIONAL.</t> | <bcp14>OPTIONAL</bcp14>.</t> | |||
<t> | ||||
<t> | Further, either tunnel endpoints or transit devices <bcp14>MAY</bcp14> use of | |||
Further, either tunnel endpoints or transit devices MAY use offload | fload | |||
capabilities of NICs such as checksum offload to improve the | capabilities of NICs, such as checksum offload, to improve the | |||
performance of Geneve packet processing. The presence of a Geneve | performance of Geneve packet processing. The presence of a Geneve | |||
variable length header should not prevent the tunnel endpoints and | variable-length header should not prevent the tunnel endpoints and | |||
transit devices from using such offload capabilities.</t> | transit devices from using such offload capabilities.</t> | |||
</section> | ||||
</section> | </section> | |||
<section anchor="sec-2.3" numbered="true" toc="default"> | ||||
</section> | <name>Use of Standard IP Fabrics</name> | |||
<t> | ||||
<section title="Use of Standard IP Fabrics" anchor="section-2.3"><t> | IP has clearly cemented its place as the dominant transport mechanism, | |||
IP has clearly cemented its place as the dominant transport mechanism | ||||
and many techniques have evolved over time to make it robust, | and many techniques have evolved over time to make it robust, | |||
efficient, and inexpensive. As a result, it is natural to use IP | efficient, and inexpensive. As a result, it is natural to use IP | |||
fabrics as a transit network for Geneve. Fortunately, the use of IP | fabrics as a transit network for Geneve. Fortunately, the use of IP | |||
encapsulation and addressing is enough to achieve the primary goal of | encapsulation and addressing is enough to achieve the primary goal of | |||
delivering packets to the correct point in the network through | delivering packets to the correct point in the network through | |||
standard switching and routing.</t> | standard switching and routing.</t> | |||
<t> | ||||
<t> | ||||
In addition, nearly all underlay fabrics are designed to exploit | In addition, nearly all underlay fabrics are designed to exploit | |||
parallelism in traffic to spread load across multiple links without | parallelism in traffic to spread load across multiple links without | |||
introducing reordering in individual flows. These equal cost | introducing reordering in individual flows. These ECMP techniques typically | |||
multipathing (ECMP) techniques typically involve parsing and hashing | involve parsing and hashing | |||
the addresses and port numbers from the packet to select an outgoing | the addresses and port numbers from the packet to select an outgoing | |||
link. However, the use of tunnels often results in poor ECMP | link. However, the use of tunnels often results in poor ECMP | |||
performance without additional knowledge of the protocol as the | performance, as without additional knowledge of the protocol, the | |||
encapsulated traffic is hidden from the fabric by design and only | encapsulated traffic is hidden from the fabric by design, and only | |||
tunnel endpoint addresses are available for hashing.</t> | tunnel endpoint addresses are available for hashing.</t> | |||
<t> | ||||
<t> | ||||
Since it is desirable for Geneve to perform well on these existing | Since it is desirable for Geneve to perform well on these existing | |||
fabrics, it is necessary for entropy from encapsulated packets to be | fabrics, it is necessary for entropy from encapsulated packets to be | |||
exposed in the tunnel header. The most common technique for this is | exposed in the tunnel header. The most common technique for this is | |||
to use the UDP source port, which is discussed further in | to use the UDP source port, which is discussed further in | |||
<xref target="section-3.3"/>.</t> | <xref target="sec-3.3" format="default"/>.</t> | |||
</section> | ||||
</section> | </section> | |||
<section anchor="sec-3" numbered="true" toc="default"> | ||||
</section> | <name>Geneve Encapsulation Details</name> | |||
<t> | ||||
<section title="Geneve Encapsulation Details" anchor="section-3"><t> | ||||
The Geneve packet format consists of a compact tunnel header | The Geneve packet format consists of a compact tunnel header | |||
encapsulated in UDP over either IPv4 or IPv6. A small fixed tunnel | encapsulated in UDP over either IPv4 or IPv6. A small fixed tunnel | |||
header provides control information plus a base level of | header provides control information plus a base level of | |||
functionality and interoperability with a focus on simplicity. This | functionality and interoperability with a focus on simplicity. This | |||
header is then followed by a set of variable options to allow for | header is then followed by a set of variable-length options to allow for | |||
future innovation. Finally, the payload consists of a protocol data | future innovation. Finally, the payload consists of a protocol data | |||
unit of the indicated type, such as an Ethernet frame. <xref target="section | unit of the indicated type, such as an Ethernet frame. Sections <xref target= | |||
-3.1"/> | "sec-3.1" format="counter"/> | |||
and <xref target="section-3.2"/> illustrate the Geneve packet format transpor | and <xref target="sec-3.2" format="counter"/> illustrate the Geneve packet fo | |||
ted (for | rmat transported (for | |||
example) over Ethernet along with an Ethernet payload.</t> | example) over Ethernet along with an Ethernet payload.</t> | |||
<section anchor="sec-3.1" numbered="true" toc="default"> | ||||
<section title="Geneve Packet Format Over IPv4" anchor="section-3.1"> | <name>Geneve Packet Format over IPv4</name> | |||
<figure> | ||||
<figure><artwork><![CDATA[ | <name>Geneve Packet Format over IPv4</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
Outer Ethernet Header: | Outer Ethernet Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | | | Outer Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | Outer Source MAC Address | | | Outer Destination MAC Address | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Source MAC Address | | | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
skipping to change at line 430 ¶ | skipping to change at line 386 ¶ | |||
Outer Ethernet Header: | Outer Ethernet Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | | | Outer Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | Outer Source MAC Address | | | Outer Destination MAC Address | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Source MAC Address | | | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Ethertype=0x0800 | | | Ethertype = 0x0800 IPv4 | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Outer IPv4 Header: | Outer IPv4 Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Version| IHL |Type of Service| Total Length | | |Version| IHL |Type of Service| Total Length | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Identification |Flags| Fragment Offset | | | Identification |Flags| Fragment Offset | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Time to Live |Protocol=17 UDP| Header Checksum | | | Time to Live |Protocol=17 UDP| Header Checksum | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Source IPv4 Address | | | Outer Source IPv4 Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination IPv4 Address | | | Outer Destination IPv4 Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Outer UDP Header: | Outer UDP Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Source Port = xxxx | Dest Port = 6081 | | | Source Port = xxxx | Dest Port = 6081 Geneve | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| UDP Length | UDP Checksum | | | UDP Length | UDP Checksum | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Geneve Header: | Geneve Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Ver| Opt Len |O|C| Rsvd. | Protocol Type | | |Ver| Opt Len |O|C| Rsvd. | Protocol Type | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Virtual Network Identifier (VNI) | Reserved | | | Virtual Network Identifier (VNI) | Reserved | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Variable Length Options | | | | | |||
~ ~ | ~ Variable-Length Options ~ | |||
| | | | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Inner Ethernet Header (example payload): | Inner Ethernet Header (example payload): | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Destination MAC Address | | | Inner Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Destination MAC Address | Inner Source MAC Address | | | Inner Destination MAC Address | Inner Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Source MAC Address | | | Inner Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Payload: | Payload: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Ethertype of Original Payload | | | | Ethertype of Original Payload | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | |||
| Original Ethernet Payload | | | Original Ethernet Payload | | |||
| | | | | | |||
| (Note that the original Ethernet Frame's Preamble, Start Frame| | ~ (Note that the original Ethernet frame's preamble, start ~ | |||
| Delimiter(SFD) & Frame Check Sequence(FCS) are not included | | | frame delimiter (SFD), and frame check sequence (FCS) are not | | |||
| and the Ethernet Payload need not be 4-byte aligned) | | | included, and the Ethernet payload need not be 4-byte aligned)| | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Frame Check Sequence: | Frame Check Sequence: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| New Frame Check Sequence (FCS) for Outer Ethernet Frame | | | New Frame Check Sequence (FCS) for Outer Ethernet Frame | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
</section> | </section> | |||
<section anchor="sec-3.2" numbered="true" toc="default"> | ||||
<section title="Geneve Packet Format Over IPv6" anchor="section-3.2"> | <name>Geneve Packet Format over IPv6</name> | |||
<figure><name>Geneve Packet Format over IPv6</name> | ||||
<figure><artwork><![CDATA[ | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
Outer Ethernet Header: | Outer Ethernet Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | | | Outer Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | Outer Source MAC Address | | | Outer Destination MAC Address | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Source MAC Address | | | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
skipping to change at line 509 ¶ | skipping to change at line 466 ¶ | |||
Outer Ethernet Header: | Outer Ethernet Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | | | Outer Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Destination MAC Address | Outer Source MAC Address | | | Outer Destination MAC Address | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Outer Source MAC Address | | | Outer Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Ethertype=0x86DD | | | Ethertype = 0x86DD IPv6 | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Outer IPv6 Header: | Outer IPv6 Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Version| Traffic Class | Flow Label | | |Version| Traffic Class | Flow Label | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Payload Length | NxtHdr=17 UDP | Hop Limit | | | Payload Length | NxtHdr=17 UDP | Hop Limit | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | | | | | |||
+ + | + + | |||
skipping to change at line 537 ¶ | skipping to change at line 494 ¶ | |||
+ + | + + | |||
| | | | | | |||
+ Outer Destination IPv6 Address + | + Outer Destination IPv6 Address + | |||
| | | | | | |||
+ + | + + | |||
| | | | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Outer UDP Header: | Outer UDP Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Source Port = xxxx | Dest Port = 6081 | | | Source Port = xxxx | Dest Port = 6081 Geneve | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| UDP Length | UDP Checksum | | | UDP Length | UDP Checksum | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Geneve Header: | Geneve Header: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Ver| Opt Len |O|C| Rsvd. | Protocol Type | | |Ver| Opt Len |O|C| Rsvd. | Protocol Type | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Virtual Network Identifier (VNI) | Reserved | | | Virtual Network Identifier (VNI) | Reserved | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Variable Length Options | | | | | |||
~ ~ | ~ Variable-Length Options ~ | |||
| | | | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Inner Ethernet Header (example payload): | Inner Ethernet Header (example payload): | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Destination MAC Address | | | Inner Destination MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Destination MAC Address | Inner Source MAC Address | | | Inner Destination MAC Address | Inner Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Inner Source MAC Address | | | Inner Source MAC Address | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
|Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | | |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Payload: | Payload: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Ethertype of Original Payload | | | | Ethertype of Original Payload | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | |||
| Original Ethernet Payload | | | Original Ethernet Payload | | |||
| | | | | | |||
| (Note that the original Ethernet Frame's Preamble, Start Frame| | ~ (Note that the original Ethernet frame's preamble, start ~ | |||
| Delimiter(SFD) & Frame Check Sequence(FCS) are not included | | | frame delimiter (SFD), and frame check sequence (FCS) are not | | |||
| and the Ethernet Payload need not be 4-byte aligned) | | | included, and the Ethernet payload need not be 4-byte aligned)| | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Frame Check Sequence: | Frame Check Sequence: | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| New Frame Check Sequence (FCS) for Outer Ethernet Frame | | | New Frame Check Sequence (FCS) for Outer Ethernet Frame | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
</section> | </section> | |||
<section anchor="sec-3.3" numbered="true" toc="default"> | ||||
<section title="UDP Header" anchor="section-3.3"><t> | <name>UDP Header</name> | |||
The use of an encapsulating UDP <xref target="RFC0768"/> header follows the | <t> | |||
The use of an encapsulating UDP <xref target="RFC0768" format="default"/> hea | ||||
der follows the | ||||
connectionless semantics of Ethernet and IP in addition to providing | connectionless semantics of Ethernet and IP in addition to providing | |||
entropy to routers performing ECMP. The header fields are therefore | entropy to routers performing ECMP. Therefore, header fields are | |||
interpreted as follows:</t> | interpreted as follows:</t> | |||
<dl newline="false" spacing="normal" indent="3"> | ||||
<t><list style="hanging" hangIndent="3"><t hangText="Source port:"> | <dt>Source Port:</dt> | |||
A source port selected by the originating tunnel | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
endpoint. This source port SHOULD be the same for all packets | A source port selected by the originating tunnel endpoint. This source p | |||
ort <bcp14>SHOULD</bcp14> be the same for all packets | ||||
belonging to a single encapsulated flow to prevent reordering due | belonging to a single encapsulated flow to prevent reordering due | |||
to the use of different paths. To encourage an even distribution | to the use of different paths. To encourage an even distribution | |||
of flows across multiple links, the source port SHOULD be | of flows across multiple links, the source port <bcp14>SHOULD</bcp14> be | |||
calculated using a hash of the encapsulated packet headers using, | calculated using a hash of the encapsulated packet headers using, | |||
for example, a traditional 5-tuple. Since the port represents a | for example, a traditional 5-tuple. Since the port represents a | |||
flow identifier rather than a true UDP connection, the entire | flow identifier rather than a true UDP connection, the entire | |||
16-bit range MAY be used to maximize entropy. In addition to setting the s | 16-bit range <bcp14>MAY</bcp14> be used to maximize entropy. In addition t | |||
ource port, | o setting the source port, | |||
for IPv6, flow label MAY also be used for providing entropy. For an exampl | for IPv6, the flow label <bcp14>MAY</bcp14> also be used for providing ent | |||
e of | ropy. For an example of | |||
using IPv6 flow label for tunnel use cases, see <xref target="RFC6438"/>. | using the IPv6 flow label for tunnel use cases, see <xref target="RFC6438" | |||
<vspace blankLines="1"/> | format="default"/>. | |||
</t> | ||||
<t> | ||||
If Geneve traffic is shared with other UDP listeners | If Geneve traffic is shared with other UDP listeners | |||
on the same IP address, tunnel endpoints SHOULD implement a mechanism | on the same IP address, tunnel endpoints <bcp14>SHOULD</bcp14> implement a mechanism | |||
to ensure ICMP return traffic arising from network errors is directed | to ensure ICMP return traffic arising from network errors is directed | |||
to the correct listener. The definition of such a mechanism is beyond | to the correct listener. The definition of such a mechanism is beyond | |||
the scope of this document. | the scope of this document. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Dest port:"> | <dt>Dest Port:</dt> | |||
IANA has assigned port 6081 as the fixed well-known | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
destination port for Geneve. Although the well-known value should | IANA has assigned port 6081 as the fixed well-known destination port | |||
be used by default, it is RECOMMENDED that implementations make | for Geneve. Although the well-known value should be used by default, it | |||
is <bcp14>RECOMMENDED</bcp14> that implementations make | ||||
this configurable. The chosen port is used for identification of | this configurable. The chosen port is used for identification of | |||
Geneve packets and MUST NOT be reversed for different ends of a | Geneve packets and <bcp14>MUST NOT</bcp14> be reversed for different ends | |||
connection as is done with TCP. It is the responsibility of the control pl | of a | |||
ane for | connection as is done with TCP. It is the responsibility of the control pl | |||
any reconfiguration of the assigned port and its interpretation by respect | ane to manage any reconfiguration of the assigned port and its interpretation by | |||
ive devices. | respective devices. | |||
The definition of the control plane is beyond the scope of this document. | The definition of the control plane is beyond the scope of this document. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="UDP length:"> | <dt>UDP Length:</dt> | |||
The length of the UDP packet including the UDP header. | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
</t> | The length of the UDP packet including the UDP header.</t> | |||
</dd> | ||||
<t hangText="UDP checksum:"> | <dt>UDP Checksum:</dt> | |||
In order to protect the Geneve header, options and | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
payload from potential data corruption, UDP checksum SHOULD be generated | In order to protect the Geneve header, options, and payload from | |||
as | potential data corruption, the UDP checksum <bcp14>SHOULD</bcp14> be gene | |||
specified in <xref target="RFC0768"/> and <xref target="RFC1112"/> when | rated as | |||
specified in <xref target="RFC0768" format="default"/> and <xref target=" | ||||
RFC1122" format="default"/> when | ||||
Geneve is encapsulated in IPv4. To protect the IP header, Geneve header, | Geneve is encapsulated in IPv4. To protect the IP header, Geneve header, | |||
options and payload from potential data corruption, the UDP checksum MUST | options, and payload from potential data corruption, the UDP checksum <bc | |||
be generated by default as specified in <xref target="RFC0768"/> | p14>MUST</bcp14> | |||
and <xref target="RFC8200"/> when Geneve | be generated by default as specified in <xref target="RFC0768" format="de | |||
is encapsulated in IPv6, except for certain conditions, which are outline | fault"/> | |||
d in the next paragraph. | and <xref target="RFC8200" format="default"/> when Geneve | |||
Upon receiving such packets with non-zero UDP checksum, | is encapsulated in IPv6, except under certain conditions, which are outli | |||
the receiving tunnel endpoints MUST validate the checksum. | ned in the next paragraph. | |||
If the checksum is not correct, the packet MUST be dropped, otherwise | Upon receiving such packets with a non-zero UDP checksum, | |||
the packet MUST be accepted for decapsulation. | the receiving tunnel endpoints <bcp14>MUST</bcp14> validate the checksum. | |||
<vspace blankLines="1"/> | If the checksum is not correct, the packet <bcp14>MUST</bcp14> be dropped | |||
Under certain conditions, the UDP checksum MAY be set to zero on transmit | ; otherwise, | |||
for packets encapsulated in both IPv4 and IPv6 <xref target="RFC8200"/>. | the packet <bcp14>MUST</bcp14> be accepted for decapsulation. | |||
See <xref target="section-4.3"/> for additional requirements that apply w | </t> | |||
hen using zero | <t> | |||
Under certain conditions, the UDP checksum <bcp14>MAY</bcp14> be set to z | ||||
ero on transmit | ||||
for packets encapsulated in both IPv4 and IPv6 <xref target="RFC8200" for | ||||
mat="default"/>. | ||||
See <xref target="sec-4.3" format="default"/> for additional | ||||
requirements that apply when using zero | ||||
UDP checksum with IPv4 and IPv6. Disabling the use of UDP checksums is | UDP checksum with IPv4 and IPv6. Disabling the use of UDP checksums is | |||
an operational consideration that should take into account the risks | an operational consideration that should take into account the risks | |||
and effects of packet corruption. | and effects of packet corruption. | |||
</t> | </t> | |||
</dd> | ||||
</list> | </dl> | |||
</t> | </section> | |||
<section anchor="sec-3.4" numbered="true" toc="default"> | ||||
</section> | <name>Tunnel Header Fields</name> | |||
<dl newline="false" spacing="normal" indent="3"> | ||||
<section title="Tunnel Header Fields" anchor="section-3.4"><t><list style | <dt>Ver (2 bits):</dt> | |||
="hanging" hangIndent="3"><t hangText="Ver (2 bits):"> | <dd> | |||
The current version number is 0. Packets received by | <t> | |||
<vspace blankLines="0"/> | The current version number is 0. Packets received by a tunnel endpoint w | |||
a tunnel endpoint with an unknown version MUST be dropped. Transit | ith an unknown version <bcp14>MUST</bcp14> be dropped. Transit | |||
devices interpreting Geneve packets with an unknown | devices interpreting Geneve packets with an unknown | |||
version number MUST treat them as UDP packets with an unknown | version number <bcp14>MUST</bcp14> treat them as UDP packets with an unkno wn | |||
payload. | payload. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Opt Len (6 bits):"> | <dt>Opt Len (6 bits):</dt> | |||
The length of the options fields, expressed in | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
four byte multiples, not including the eight byte fixed tunnel | The length of the option fields, expressed in 4-byte multiples, not inclu | |||
ding the 8-byte fixed tunnel | ||||
header. This results in a minimum total Geneve header size of 8 | header. This results in a minimum total Geneve header size of 8 | |||
bytes and a maximum of 260 bytes. The start of the payload | bytes and a maximum of 260 bytes. The start of the payload | |||
headers can be found using this offset from the end of the base | headers can be found using this offset from the end of the base | |||
Geneve header. | Geneve header. | |||
<vspace blankLines="1"/> | </t> | |||
Transit devices MUST maintain consistent forwarding behavior | <t> | |||
Transit devices <bcp14>MUST</bcp14> maintain consistent forwarding behavior | ||||
irrespective of the value of 'Opt Len', including ECMP link | irrespective of the value of 'Opt Len', including ECMP link | |||
selection. | selection. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="O (1 bit):"> | <dt>O (1 bit):</dt> | |||
Control packet. This packet contains a control message. | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
Control messages are sent between tunnel endpoints. | Control packet. This packet contains a control message. Control messages | |||
Tunnel endpoints MUST NOT forward the payload | are sent between tunnel endpoints. | |||
and transit devices MUST NOT attempt to interpret it. | Tunnel endpoints <bcp14>MUST NOT</bcp14> forward the payload, | |||
Since control messages are less frequent, it is RECOMMENDED | and transit devices <bcp14>MUST NOT</bcp14> attempt to interpret it. | |||
that tunnel endpoints direct these packets to a high priority control | Since control messages are less frequent, it is <bcp14>RECOMMENDED</bcp14> | |||
that tunnel endpoints direct these packets to a high-priority control | ||||
queue (for example, to direct the packet to a general purpose CPU | queue (for example, to direct the packet to a general purpose CPU | |||
from a forwarding ASIC or to separate out control traffic on a | from a forwarding Application-Specific Integrated Circuit (ASIC) or to sep | |||
NIC). Transit devices MUST NOT alter forwarding behavior on the | arate out control traffic on a | |||
NIC). Transit devices <bcp14>MUST NOT</bcp14> alter forwarding behavior o | ||||
n the | ||||
basis of this bit, such as ECMP link selection. | basis of this bit, such as ECMP link selection. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="C (1 bit):"> | <dt>C (1 bit):</dt> | |||
Critical options present. One or more options has the | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
critical bit set (see <xref target="section-3.5"/>). If this bit is set | Critical options present. One or more options has the critical bit set ( | |||
then | see <xref target="sec-3.5" format="default"/>). If this bit is set, then | |||
tunnel endpoints MUST parse the options list to interpret any | tunnel endpoints <bcp14>MUST</bcp14> parse the options list to interpret a | |||
ny | ||||
critical options. On tunnel endpoints where option parsing is not | critical options. On tunnel endpoints where option parsing is not | |||
supported the packet MUST be dropped on the basis of the 'C' bit | supported, the packet <bcp14>MUST</bcp14> be dropped on the basis of the ' | |||
in the base header. If the bit is not set tunnel endpoints MAY | C' bit | |||
in the base header. If the bit is not set, tunnel endpoints <bcp14>MAY</b | ||||
cp14> | ||||
strip all options using 'Opt Len' and forward the decapsulated | strip all options using 'Opt Len' and forward the decapsulated | |||
packet. Transit devices MUST NOT drop packets on the | packet. Transit devices <bcp14>MUST NOT</bcp14> drop packets on the | |||
basis of this bit. | basis of this bit. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Rsvd. (6 bits):"> | <dt>Rsvd. (6 bits):</dt> | |||
Reserved field, which MUST be zero on transmission | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
and MUST be ignored on receipt. | Reserved field, which <bcp14>MUST</bcp14> be zero on transmission and <bc | |||
</t> | p14>MUST</bcp14> be ignored on receipt. | |||
</t> | ||||
<t hangText="Protocol Type (16 bits):"> | </dd> | |||
The type of the protocol data unit | <dt>Protocol Type (16 bits):</dt> | |||
<vspace blankLines="0"/> | <dd> | |||
appearing after the Geneve header. This follows the EtherType | <t> | |||
<xref target="ETYPES"/> convention; with Ethernet itself being represented | The type of protocol data unit appearing after the Geneve header. This f | |||
by the | ollows the Ethertype | |||
<xref target="ETYPES" format="default"/> convention, with Ethernet itself | ||||
being represented by the | ||||
value 0x6558. | value 0x6558. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Virtual Network Identifier (VNI) (24 bits):"> | <dt>Virtual Network Identifier (VNI) (24 bits):</dt> | |||
An identifier for a | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
unique element of a virtual network. In many situations this may | An identifier for a unique element of a virtual network. In many situati | |||
represent an L2 segment, however, the control plane defines the | ons, this may | |||
forwarding semantics of decapsulated packets. The VNI MAY be used | represent an L2 segment; however, the control plane defines the | |||
as part of ECMP forwarding decisions or MAY be used as a mechanism | forwarding semantics of decapsulated packets. The VNI <bcp14>MAY</bcp14> | |||
be used | ||||
as part of ECMP forwarding decisions or <bcp14>MAY</bcp14> be used as a me | ||||
chanism | ||||
to distinguish between overlapping address spaces contained in the | to distinguish between overlapping address spaces contained in the | |||
encapsulated packet when load balancing across CPUs. | encapsulated packet when load balancing across CPUs. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Reserved (8 bits):"> | <dt>Reserved (8 bits):</dt> | |||
Reserved field which MUST be zero on transmission | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
and ignored on receipt. | Reserved field, which <bcp14>MUST</bcp14> be zero on transmission and ign | |||
</t> | ored on receipt. | |||
</t> | ||||
</list> | </dd> | |||
</t> | </dl> | |||
</section> | </section> | |||
<section anchor="sec-3.5" numbered="true" toc="default"> | ||||
<section title="Tunnel Options" anchor="section-3.5"><figure><artwork><![ | <name>Tunnel Options</name> | |||
CDATA[ | <figure anchor="geneve-options"> | |||
<name>Geneve Option</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Option Class | Type |R|R|R| Length | | | Option Class | Type |R|R|R| Length | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Variable Option Data | | | | | |||
~ ~ | ~ Variable-Length Option Data ~ | |||
| | | | | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Geneve Option | ||||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t> | <t> | |||
The base Geneve header is followed by zero or more options in Type- | The base Geneve header is followed by zero or more options in Type-Length-Val | |||
Length-Value format. Each option consists of a four byte option | ue format. Each option consists of a 4-byte option | |||
header and a variable amount of option data interpreted according to | header and a variable amount of option data interpreted according to | |||
the type.</t> | the type.</t> | |||
<dl newline="false" spacing="normal" indent="3"> | ||||
<t><list style="hanging" hangIndent="3"><t hangText="Option Class (16 bit | <dt>Option Class (16 bits):</dt> | |||
s):"> | <dd> | |||
Namespace for the 'Type' field. IANA will | <t> | |||
<vspace blankLines="0"/> | Namespace for the 'Type' field. IANA has created a "Geneve Option Class" | |||
be requested to create a "Geneve Option Class" registry to | registry to | |||
allocate identifiers for organizations, technologies, and vendors | allocate identifiers for organizations, technologies, and vendors | |||
that have an interest in creating types for options. Each | that have an interest in creating types for options. Each | |||
organization may allocate types independently to allow | organization may allocate types independently to allow | |||
experimentation and rapid innovation. It is expected that over | experimentation and rapid innovation. It is expected that, over | |||
time certain options will become well known and a given | time, certain options will become well known, and a given | |||
implementation may use option types from a variety of sources. In | implementation may use option types from a variety of sources. In | |||
addition, IANA will be requested to reserve specific ranges for | addition, IANA has reserved specific ranges for | |||
allocation by IETF Review and for Experimental Use (see <xref target="sect | allocation by IETF Review and for Experimental Use (see <xref target="sec- | |||
ion-7"/>). | 7" format="default"/>). | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Type (8 bits):"> | <dt>Type (8 bits):</dt> | |||
Type indicating the format of the data contained in | <dd> | |||
<vspace blankLines="0"/> | <t> | |||
this option. Options are primarily designed to encourage future | Type indicating the format of the data contained in this option. Options | |||
extensibility and innovation and so standardized forms of these | are primarily designed to encourage future | |||
extensibility and innovation, and standardized forms of these | ||||
options will be defined in separate documents. | options will be defined in separate documents. | |||
<vspace blankLines="1"/> | </t> | |||
The high order bit of the option type indicates that this is a | <t> | |||
The high-order bit of the option type indicates that this is a | ||||
critical option. If the receiving tunnel endpoint does not recognize | critical option. If the receiving tunnel endpoint does not recognize | |||
this option and this bit is set then the packet MUST be dropped. | the option and this bit is set, then the packet <bcp14>MUST</bcp14> be dro | |||
If this bit is set in any option then the 'C' bit in the | pped. | |||
Geneve base header MUST also be set. Transit devices MUST NOT | If this bit is set in any option, then the 'C' bit in the | |||
Geneve base header <bcp14>MUST</bcp14> also be set. Transit devices <bcp1 | ||||
4>MUST NOT</bcp14> | ||||
drop packets on the basis of this bit. The following figure shows | drop packets on the basis of this bit. The following figure shows | |||
the location of the 'C' bit in the 'Type' field: | the location of the 'C' bit in the 'Type' field: | |||
</t> | </t> | |||
</dd> | ||||
</list> | </dl> | |||
</t> | <figure><name>'C' Bit in the 'Type' Field</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
<figure><artwork><![CDATA[ | ||||
0 1 2 3 4 5 6 7 8 | 0 1 2 3 4 5 6 7 8 | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
|C| Type | | |C| Type | | |||
+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+ | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t><list hangIndent="3" style="hanging"><t> | <dl newline="false" spacing="normal" indent="3"> | |||
<dt/> | ||||
<dd> | ||||
The requirement to drop a packet with an unknown option with the 'C' bit s et | The requirement to drop a packet with an unknown option with the 'C' bit s et | |||
applies to the entire tunnel endpoint system and not a particular | applies to the entire tunnel endpoint system and not a particular | |||
component of the implementation. For example, in a system | component of the implementation. For example, in a system | |||
comprised of a forwarding ASIC and a general purpose CPU, this | comprised of a forwarding ASIC and a general purpose CPU, this | |||
does not mean that the packet must be dropped in the ASIC. An | does not mean that the packet must be dropped in the ASIC. An | |||
implementation may send the packet to the CPU using a rate-limited | implementation may send the packet to the CPU using a rate-limited | |||
control channel for slow-path exception handling.</t> | control channel for slow-path exception handling.</dd> | |||
</dl> | ||||
</list> | <dl newline="false" spacing="normal" indent="3"> | |||
</t> | <dt>R (3 bits):</dt> | |||
<dd> | ||||
<t><list style="hanging" hangIndent="3"><t hangText="R (3 bits):"> | Option control flags reserved for future use. These bits <bcp14>MUST</bc | |||
Option control flags reserved for future use. These bits MUST be | p14> be | |||
zero on transmission and MUST be ignored on receipt. | zero on transmission and <bcp14>MUST</bcp14> be ignored on receipt. | |||
</t> | </dd> | |||
<dt>Length (5 bits):</dt> | ||||
<t hangText="Length (5 bits):"> | <dd> | |||
Length of the option, expressed in four byte | <t> | |||
<vspace blankLines="0"/> | Length of the option, expressed in 4-byte | |||
multiples excluding the option header. The total length of each | multiples, excluding the option header. The total length of each | |||
option may be between 4 and 128 bytes. A value of 0 in the Length field i | option may be between 4 and 128 bytes. A value of 0 in the 'Length' field | |||
mplies | implies | |||
an option with only an option header and no variable option data. | an option with only an option header and no option data. Packets | |||
Packets in which the total | in which the total | |||
length of all options is not equal to the 'Opt Len' in the base | length of all options is not equal to the 'Opt Len' in the base | |||
header are invalid and MUST be silently dropped if received by a | header are invalid and <bcp14>MUST</bcp14> be silently dropped if received by a | |||
tunnel endpoint that processes the options. | tunnel endpoint that processes the options. | |||
</t> | </t> | |||
</dd> | ||||
<t hangText="Variable Option Data:"> | <dt>Variable-Length Option Data:</dt> | |||
<dd> | ||||
<t> | ||||
Option data interpreted according to 'Type'. | Option data interpreted according to 'Type'. | |||
<vspace blankLines="0"/> | </t> | |||
</t> | ||||
</list> | ||||
</t> | ||||
<section title="Options Processing" anchor="section-3.5.1"><t> | </dd> | |||
</dl> | ||||
<section anchor="sec-3.5.1" numbered="true" toc="default"> | ||||
<name>Options Processing</name> | ||||
<t> | ||||
Geneve options are intended to be originated and processed | Geneve options are intended to be originated and processed | |||
by tunnel endpoints. However, options MAY be interpreted by transit | by tunnel endpoints. However, options <bcp14>MAY</bcp14> be interpreted by t ransit | |||
devices along the tunnel path. Transit devices not | devices along the tunnel path. Transit devices not | |||
interpreting Geneve headers (which may or may not include options) MUST handl e | interpreting Geneve headers (which may or may not include options) <bcp14>MUS T</bcp14> handle | |||
Geneve packets as any other UDP packet and maintain consistent forwarding beh avior.</t> | Geneve packets as any other UDP packet and maintain consistent forwarding beh avior.</t> | |||
<t> | ||||
<t> | ||||
In tunnel endpoints, the generation and interpretation of options is | In tunnel endpoints, the generation and interpretation of options is | |||
determined by the control plane, which is beyond the the scope of this | determined by the control plane, which is beyond the scope of this | |||
document. However, to ensure interoperability between heterogeneous | document. However, to ensure interoperability between heterogeneous | |||
devices some requirements are imposed on options and the devices that | devices, some requirements are imposed on options and the devices that | |||
process them:</t> | process them:</t> | |||
<ul spacing="normal"> | ||||
<t><list style="symbols"><t>Receiving tunnel endpoints MUST drop packets | <li>Receiving tunnel endpoints <bcp14>MUST</bcp14> drop packets cont | |||
containing unknown options | aining unknown options | |||
with the 'C' bit set in the option type. Conversely, transit | with the 'C' bit set in the option type. Conversely, transit | |||
devices MUST NOT drop packets as a result of encountering unknown | devices <bcp14>MUST NOT</bcp14> drop packets as a result of encountering u | |||
options, including those with the 'C' bit set.</t> | nknown | |||
options, including those with the 'C' bit set.</li> | ||||
<t>The contents of the options and their ordering MUST NOT be | <li>The contents of the options and their ordering <bcp14>MUST NOT</ | |||
modified by transit devices.</t> | bcp14> be | |||
modified by transit devices.</li> | ||||
<t>If a tunnel endpoint receives a Geneve packet with 'Opt Len' (total le | <li>If a tunnel endpoint receives a Geneve packet with an 'Opt Len' | |||
ngth of all options) | (the total length of all options) | |||
that exceeds the options processing capability of the tunnel endpoint the | that exceeds the options-processing capability of the tunnel endpoint, th | |||
n | en | |||
the tunnel endpoint MUST drop such packets. An implementation may raise a | the tunnel endpoint <bcp14>MUST</bcp14> drop such packets. An implementat | |||
n | ion may raise an | |||
exception to the control plane of such an event. It is the responsibility | exception to the control plane in such an event. It is the responsibility | |||
of the control plane to ensure the communicating peer tunnel endpoints | of the control plane to ensure the communicating peer tunnel endpoints | |||
have the processing capability to handle the total length of options. | have the processing capability to handle the total length of options. | |||
The definition of the control plane is beyond the scope of this document. | The definition of the control plane is beyond the scope of this document. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t> | |||
<t> | ||||
When designing a Geneve option, it is important to consider how the | When designing a Geneve option, it is important to consider how the | |||
option will evolve in the future. Once an option is defined it is | option will evolve in the future. Once an option is defined, it is | |||
reasonable to expect that implementations may come to depend on a | reasonable to expect that implementations may come to depend on a | |||
specific behavior. As a result, the scope of any future changes must | specific behavior. As a result, the scope of any future changes must | |||
be carefully described upfront.</t> | be carefully described upfront.</t> | |||
<t> | ||||
<t> | Architecturally, options are intended to be self descriptive and independent. | |||
Architecturally, options are intended to be self-descriptive and independent. | This enables parallelism in options processing and reduces implementation com | |||
This enables parallelism in option processing and reduces implementation comp | plexity. | |||
lexity. | However, the control plane may impose certain ordering restrictions, as | |||
However, the control plane may impose certain ordering restrictions as | described in <xref target="sec-4.5.1" format="default"/>.</t> | |||
described in <xref target="section-4.5.1"/>.</t> | <t> | |||
<t> | ||||
Unexpectedly significant interoperability issues may result from | Unexpectedly significant interoperability issues may result from | |||
changing the length of an option that was defined to be a certain | changing the length of an option that was defined to be a certain | |||
size. A particular option is specified to have either a fixed | size. A particular option is specified to have either a fixed | |||
length, which is constant, or a variable length, which may change | length, which is constant, or a variable length, which may change | |||
over time or for different use cases. This property is part of the | over time or for different use cases. This property is part of the | |||
definition of the option and conveyed by the 'Type'. For fixed | definition of the option and is conveyed by the 'Type'. For fixed-length opt | |||
length options, some implementations may choose to ignore the length | ions, some implementations may choose to ignore the 'Length' | |||
field in the option header and instead parse based on the well known | field in the option header and instead parse based on the well-known | |||
length associated with the type. In this case, redefining the length | length associated with the type. In this case, redefining the length | |||
will impact not only parsing of the option in question but also any | will impact not only the parsing of the option in question but also any | |||
options that follow. Therefore, options that are defined to be fixed | options that follow. Therefore, options that are defined to be a fixed | |||
length in size MUST NOT be redefined to a different length. Instead, | length in size <bcp14>MUST NOT</bcp14> be redefined to a different length. I | |||
nstead, | ||||
a new 'Type' should be allocated. Actual definition of the option type is bey ond | a new 'Type' should be allocated. Actual definition of the option type is bey ond | |||
the scope of this document. The option type and its interpretation should be | the scope of this document. The option type and its interpretation should be | |||
defined by the entity that owns the option class.</t> | defined by the entity that owns the option class.</t> | |||
<t> | ||||
<t> | Options may be processed by NIC hardware utilizing offloads (e.g., LSO and LR | |||
Options may be processed by NIC hardware utilizing offloads (e.g. LSO and LRO | O) | |||
) | as described in <xref target="sec-4.6" format="default"/>. Careful considerat | |||
as described in <xref target="section-4.6"/>. Careful consideration should be | ion should be | |||
given to how the offload capabilities outlined in <xref target="section-4.6"/ | given to how the offload capabilities outlined in <xref target="sec-4.6" form | |||
> | at="default"/> | |||
impact an option's design. | impact an option's design. | |||
</t> | </t> | |||
</section> | ||||
</section> | </section> | |||
</section> | ||||
</section> | <section anchor="sec-4" numbered="true" toc="default"> | |||
<name>Implementation and Deployment Considerations</name> | ||||
</section> | <section anchor="sec-4.1" numbered="true" toc="default"> | |||
<name>Applicability Statement</name> | ||||
<section title="Implementation and Deployment Considerations" anchor="sec | <t> | |||
tion-4"> | Geneve is a UDP-based network virtualization overlay encapsulation protoc | |||
<section title="Applicability Statement" anchor="section-4.1"><t> | ol | |||
Geneve is a network virtualization overlay encapsulation protocol | ||||
designed to establish tunnels between NVEs over an existing IP network. | designed to establish tunnels between NVEs over an existing IP network. | |||
It is intended for use in public or private data center environments, | It is intended for use in public or private data center environments, | |||
for deploying multi-tenant overlay networks over an existing IP underlay network.</t> | for deploying multi-tenant overlay networks over an existing IP underlay network.</t> | |||
<t> | ||||
<t> | As a UDP-based protocol, Geneve adheres | |||
Geneve is a UDP based encapsulation protocol transported over existing | to the UDP usage guidelines as specified in <xref target="RFC8085" format | |||
IPv4 and IPv6 networks. Hence, as a UDP based protocol, Geneve adheres | ="default"/>. | |||
to the UDP usage guidelines as specified in <xref target="RFC8085"/>. | The applicability of these guidelines is dependent on the underlay | |||
The applicability of these guidelines are dependent on the underlay | IP network and the nature of the Geneve payload protocol | |||
IP network and the nature of Geneve payload protocol | (for example, TCP/IP, IP/Ethernet).</t> | |||
(example TCP/IP, IP/Ethernet).</t> | <t> | |||
<t> | ||||
Geneve is intended to be deployed in a data center network environment | Geneve is intended to be deployed in a data center network environment | |||
operated by a single operator or adjacent set of cooperating network | operated by a single operator or an adjacent set of cooperating network | |||
operators that fits with the definition of controlled environments | operators that fits with the definition of controlled environments | |||
in <xref target="RFC8085"/>. A network in a controlled environmen | in <xref target="RFC8085" format="default"/>. A network in a cont | |||
t can be | rolled environment can be | |||
managed to operate under certain conditions whereas in the general | managed to operate under certain conditions, whereas in the general | |||
Internet this cannot be done. Hence requirements for a tunnel | Internet, this cannot be done. Hence, requirements for a tunneling | |||
protocol operating under a controlled environment can be less | protocol operating under a controlled environment can be less | |||
restrictive than the requirements of the general Internet. | restrictive than the requirements of the general Internet. | |||
</t> | </t> | |||
<t> | ||||
<t> | ||||
For the purpose of this document, a traffic-managed controlled environmen t | For the purpose of this document, a traffic-managed controlled environmen t | |||
(TMCE) is defined as an IP network that is traffic-engineered and/or othe rwise | (TMCE) is defined as an IP network that is traffic engineered and/or othe rwise | |||
managed (e.g., via use of traffic rate limiters) to avoid congestion. The concept | managed (e.g., via use of traffic rate limiters) to avoid congestion. The concept | |||
of TMCE is outlined in <xref target="RFC8086"/>. Significant portions of | of a TMCE is outlined in <xref target="RFC8086" format="default"/>. Signi | |||
the text | ficant portions of the text | |||
in <xref target="section-4.1"/> through <xref target="section-4.3"/> are | in <xref target="sec-4.1" format="default"/> through <xref target="sec-4. | |||
based | 3" format="default"/> are based | |||
on <xref target="RFC8086"/> as applicable to Geneve.</t> | on <xref target="RFC8086" format="default"/> as applicable to Geneve.</t> | |||
<t> | ||||
<t> | ||||
It is the responsibility of the operator to ensure that the guidelines/re quirements | It is the responsibility of the operator to ensure that the guidelines/re quirements | |||
in this section are followed as applicable to their Geneve deployment(s). </t> | in this section are followed as applicable to their Geneve deployment(s). </t> | |||
</section> | </section> | |||
<section anchor="sec-4.2" numbered="true" toc="default"> | ||||
<section title="Congestion Control Functionality" anchor="section-4.2"><t | <name>Congestion-Control Functionality</name> | |||
> | <t> | |||
Geneve does not natively provide congestion control functionality and rel | Geneve does not natively provide congestion-control functionality and rel | |||
ies | ies | |||
on the payload protocol traffic for congestion control. As such Geneve MU | on the payload protocol traffic for congestion control. As such, Geneve < | |||
ST | bcp14>MUST</bcp14> | |||
be used with congestion controlled traffic or within a network that is | be used with congestion-controlled traffic or within a TMCE to avoid cong | |||
traffic managed to avoid congestion (TMCE). An operator of a traffic | estion. An operator of a TMCE may avoid congestion through careful provisioning | |||
managed network (TMCE) may avoid congestion by careful provisioning | of their networks, rate-limiting user data traffic, and managing traffic | |||
of their networks, rate-limiting of user data traffic and traffic | ||||
engineering according to path capacity.</t> | engineering according to path capacity.</t> | |||
</section> | </section> | |||
<section anchor="sec-4.3" numbered="true" toc="default"> | ||||
<section title="UDP Checksum" anchor="section-4.3"><t> | <name>UDP Checksum</name> | |||
In order to provide integrity of Geneve headers, options and payload, | <t> | |||
(for example to avoid misdelivery of payload to different tenant systems) | The outer UDP checksum <bcp14>SHOULD</bcp14> be used with Geneve when tra | |||
in case of data corruption, the outer UDP checksum SHOULD be used with Ge | nsported | |||
neve | over IPv4; this is to provide integrity for the Geneve headers, | |||
when transported over IPv4. The UDP checksum provides a statistical guara | options, and payload in case of data corruption (for example, to | |||
ntee | avoid misdelivery of the payload to different tenant systems). The UDP check | |||
sum provides a statistical guarantee | ||||
that a payload was not corrupted in transit. These integrity checks are n ot | that a payload was not corrupted in transit. These integrity checks are n ot | |||
strong from a coding or cryptographic perspective and are not designed to | strong from a coding or cryptographic perspective and are not designed to | |||
detect physical-layer errors or malicious modification of the datagram | detect physical-layer errors or malicious modification of the datagram | |||
(see Section 3.4 of <xref target="RFC8085"/>). In deployments where such | (see <xref target="RFC8085" sectionFormat="of" section="3.4"/>). In deplo | |||
a risk exists, | yments where such a risk exists, | |||
an operator SHOULD use additional data integrity mechanisms such as offer | an operator <bcp14>SHOULD</bcp14> use additional data integrity | |||
ed | mechanisms such as those offered | |||
by IPsec (see <xref target="section-6.2"/>).</t> | by IPsec (see <xref target="sec-6.2" format="default"/>).</t> | |||
<t> | ||||
An operator MAY choose to disable UDP checksums | ||||
and use zero checksums if Geneve packet integrity is provided by other da | ||||
ta | ||||
integrity mechanisms such as IPsec or additional checksums or if one of | ||||
the conditions in <xref target="section-4.3.1"/> a, b, c are met.</t> | ||||
<t> | <t> | |||
By default, UDP checksums MUST be used when Geneve is transported over IP | An operator <bcp14>MAY</bcp14> choose to disable UDP checksums | |||
v6. | and use zero UDP checksum if Geneve packet integrity is provided by other | |||
A tunnel endpoint MAY be configured for use with zero UDP checksum if | data | |||
additional requirements in <xref target="section-4.3.1"/> are met.</t> | integrity mechanisms, such as IPsec or additional checksums, or if one of | |||
the conditions (a, b, or c) in <xref target="sec-4.3.1" format="default"/ | ||||
<section title="UDP Zero Checksum Handling with IPv6" anchor="section-4.3 | > is met.</t> | |||
.1"><t> | <t> | |||
By default, UDP checksums <bcp14>MUST</bcp14> be used when Geneve is tran | ||||
sported over IPv6. | ||||
A tunnel endpoint <bcp14>MAY</bcp14> be configured for use with zero UDP | ||||
checksum if | ||||
additional requirements in <xref target="sec-4.3.1" format="default"/> ar | ||||
e met.</t> | ||||
<section anchor="sec-4.3.1" numbered="true" toc="default"> | ||||
<name>Zero UDP Checksum Handling with IPv6</name> | ||||
<t> | ||||
When Geneve is used over IPv6, the UDP checksum is used to protect IPv6 h eaders, | When Geneve is used over IPv6, the UDP checksum is used to protect IPv6 h eaders, | |||
UDP headers and Geneve headers, options and payload from potential data c | UDP headers, and Geneve headers, options, and payload from potential data | |||
orruption. | corruption. | |||
As such by default Geneve MUST use UDP checksums when transported over IP | As such, by default, Geneve <bcp14>MUST</bcp14> use UDP checksums when tr | |||
v6. | ansported over IPv6. | |||
An operator MAY choose to configure to operate with zero UDP checksum if | An operator <bcp14>MAY</bcp14> choose to configure zero UDP checksum if | |||
operating in a traffic managed controlled environment as stated in | operating in a TMCE as stated in | |||
<xref target="section-4.1"/> if one of the following conditions are met.< | <xref target="sec-4.1" format="default"/> if one of the following conditi | |||
/t> | ons is met.</t> | |||
<ol spacing="normal" type="a"> | ||||
<t><list style="letters"><t>It is known that the packet corruption is exc | <li>It is known that packet corruption is exceptionally | |||
eptionally | ||||
unlikely (perhaps based on knowledge of equipment types in their underlay | unlikely (perhaps based on knowledge of equipment types in their underlay | |||
network) and the operator is willing to take a risk of undetected packet | network) and the operator is willing to risk undetected packet | |||
corruption</t> | corruption.</li> | |||
<li>It is judged through observational measurements (perhaps through | ||||
<t>It is judged through observational measurements (perhaps through histo | historic | |||
ric | or current traffic flows that use non-zero checksum) that the level of pa | |||
or current traffic flows that use non zero checksum) that the level of pa | cket | |||
cket | corruption is tolerably low and is where the operator is willing to risk | |||
corruption is tolerably low and where the operator is willing to take | undetected corruption.</li> | |||
the risk of undetected corruption.</t> | <li>The Geneve payload is carrying applications that are tolerant of | |||
misdelivered | ||||
<t>Geneve payload is carrying applications that are tolerant of misdelive | or corrupted packets (perhaps through higher-layer checksum validation | |||
red | and/or reliability through retransmission). </li> | |||
or corrupted packets (perhaps through higher layer checksum validation | </ol> | |||
and/or reliability through retransmission) </t> | <t> In addition, Geneve tunnel implementations using zero UDP checksum | |||
</list> | <bcp14>MUST</bcp14> meet | |||
</t> | ||||
<t> In addition Geneve tunnel implementations using zero UDP checksum MUS | ||||
T meet | ||||
the following requirements:</t> | the following requirements:</t> | |||
<ol spacing="normal" type="1"> | ||||
<t><list style="numbers"><t>Use of UDP checksum over IPv6 MUST be the def | <li>Use of UDP checksum over IPv6 <bcp14>MUST</bcp14> be the default | |||
ault | configuration for all Geneve tunnels.</li> | |||
configuration for all Geneve tunnels.</t> | <li>If Geneve is used with zero UDP checksum over IPv6, then such | |||
a tunnel | ||||
<t>If Geneve is used with zero UDP checksum over IPv6 then such tunnel | endpoint implementation <bcp14>MUST</bcp14> meet all the requirements spe | |||
endpoint implementation MUST meet all the requirements specified | cified | |||
in Section 4 of <xref target="RFC6936"/> and requirement 1 as specified i | in <xref target="RFC6936" sectionFormat="of" section="4"/> and requiremen | |||
n | t 1 as specified in <xref target="RFC6936" sectionFormat="of" section="5"/> sinc | |||
Section 5 of <xref target="RFC6936"/> as that is relevant to Geneve.</t> | e it is relevant to Geneve.</li> | |||
<li>The Geneve tunnel endpoint that decapsulates the tunnel | ||||
<t>The Geneve tunnel endpoint that decapsulates the tunnel SHOULD check t | <bcp14>SHOULD</bcp14> check that the | |||
he | source and destination IPv6 addresses are valid for the Geneve tunnel tha | |||
source and destination IPv6 addresses are valid for the Geneve tunnel th | t | |||
at | ||||
is configured to receive zero UDP checksum and discard other packets | is configured to receive zero UDP checksum and discard other packets | |||
for which such check fails.</t> | for which such a check fails.</li> | |||
<li> | ||||
<t>The Geneve tunnel endpoint that encapsulates the tunnel MAY use differ | <t>The Geneve tunnel endpoint that encapsulates the tunnel <bcp14> | |||
ent | MAY</bcp14> use different | |||
IPv6 source addresses for each Geneve tunnel that uses zero UDP checksum mode | IPv6 source addresses for each Geneve tunnel that uses zero UDP checksum mode | |||
in order to strengthen the decapsulator's check of the IPv6 source addres s | in order to strengthen the decapsulator's check of the IPv6 source addres s | |||
(i.e the same IPv6 source address is not to be used with more than one IP v6 | (i.e., the same IPv6 source address is not to be used with more than one IPv6 | |||
destination address, irrespective of whether that destination address is | destination address, irrespective of whether that destination address is | |||
a unicast or multicast address). When this is not possible, it is RECOMME NDED | a unicast or multicast address). When this is not possible, it is <bcp14> RECOMMENDED</bcp14> | |||
to use each source address for as few Geneve tunnels that use zero UDP | to use each source address for as few Geneve tunnels that use zero UDP | |||
checksum as is feasible. | checksum as is feasible. | |||
<vspace blankLines="1"/> | </t> | |||
Note that (for requirements 3 and 4) the receiving tunnel endpoint can ap | <t> | |||
ply | Note that for requirements 3 and 4, the receiving tunnel endpoint can app | |||
ly | ||||
these checks only if it has out-of-band knowledge that the encapsulating tunnel | these checks only if it has out-of-band knowledge that the encapsulating tunnel | |||
endpoint is applying the indicated behavior. One possibility to obtain th is out-of-band | endpoint is applying the indicated behavior. One possibility to obtain th is out-of-band | |||
knowledge is through signaling by the control plane. The definition of | knowledge is through signaling by the control plane. The definition of | |||
the control plane is beyond the scope of this document.</t> | the control plane is beyond the scope of this document.</t> | |||
</li> | ||||
<t>Measures SHOULD be taken to prevent Geneve traffic over IPv6 with zero | <li>Measures <bcp14>SHOULD</bcp14> be taken to prevent Geneve traffi | |||
UDP | c over IPv6 with zero UDP | |||
checksum from escaping into the general Internet. Examples of such measur es include | checksum from escaping into the general Internet. Examples of such measur es include | |||
employing packet filters at the gateways or edge of Geneve network and/or | employing packet filters at the gateways or edge of the Geneve network an d/or | |||
keeping logical or physical separation of the Geneve network from network s | keeping logical or physical separation of the Geneve network from network s | |||
carrying the general Internet traffic.</t> | carrying general Internet traffic.</li> | |||
</list> | </ol> | |||
</t> | <t> The above requirements do not change the requirements | |||
specified in either <xref target="RFC8200" format="default"/> or | ||||
<t> The above requirements do not change either the requirements | <xref target="RFC6936" format="default"/>. | |||
specified in <xref target="RFC8200"/> or | </t> | |||
the requirements specified in <xref target="RFC6936"/>. | <t>The use of the source IPv6 address in addition to the | |||
</t> | ||||
<t>The use of the source IPv6 address in addition to the | ||||
destination IPv6 address, plus the recommendation against | destination IPv6 address, plus the recommendation against | |||
reuse of source IPv6 addresses among Geneve tunnels collectively | reuse of source IPv6 addresses among Geneve tunnels, collectively | |||
provide some mitigation for the absence of UDP checksum coverage of | provide some mitigation for the absence of UDP checksum coverage of | |||
the IPv6 header. A traffic-managed controlled environment that satisfies | the IPv6 header. A traffic-managed controlled environment that satisfies | |||
at least one of three conditions listed at the beginning of | at least one of the three conditions listed at the beginning of | |||
this section provides additional assurance. | this section provides additional assurance. | |||
</t> | </t> | |||
</section> | ||||
<t> Editorial Note (The following paragraph to be removed by the | </section> | |||
RFC Editor before publication) </t> | <section anchor="sec-4.4" numbered="true" toc="default"> | |||
<t> It was discussed during TSVART early review if the level of requireme | <name>Encapsulation of Geneve in IP</name> | |||
nt for using | <t> | |||
different IPv6 source addresses for different tunnel destinations | As an IP-based tunneling protocol, Geneve shares many properties and | |||
would need to be "MAY" | ||||
or "SHOULD". The discussion concluded that it was appropriate to | ||||
keep this | ||||
as "MAY", since it was considered not realistic for control plane | ||||
s having to | ||||
maintain a high level of state on a per tunnel destination basis. | ||||
In addition, the | ||||
text above provides sufficient guidance to operators and implemen | ||||
tors on possible mitigations.</t> | ||||
</section> | ||||
</section> | ||||
<section title="Encapsulation of Geneve in IP" anchor="section-4.4"><t> | ||||
As an IP-based tunnel protocol, Geneve shares many properties and | ||||
techniques with existing protocols. The application of some of these | techniques with existing protocols. The application of some of these | |||
are described in further detail, although in general most concepts | are described in further detail, although, in general, most concepts | |||
applicable to the IP layer or to IP tunnels generally also function | applicable to the IP layer or to IP tunnels generally also function | |||
in the context of Geneve.</t> | in the context of Geneve.</t> | |||
<section anchor="sec-4.4.1" numbered="true" toc="default"> | ||||
<section title="IP Fragmentation" anchor="section-4.4.1"> | <name>IP Fragmentation</name> | |||
<t> | ||||
<t> | It is <bcp14>RECOMMENDED</bcp14> that Path MTU Discovery (see <xref | |||
It is strongly RECOMMENDED that Path MTU Discovery (<xref target="RFC1191"/>, | target="RFC1191" format="default"/> and <xref target="RFC8201" format="defaul | |||
<xref target="RFC8201"/>) be used to prevent or minimize fragmentation. | t"/>) be used to prevent or minimize fragmentation. | |||
The use of Path MTU Discovery on the transit network provides the | The use of Path MTU Discovery on the transit network provides the | |||
encapsulating tunnel endpoint with soft-state about the link that it may use | encapsulating tunnel endpoint with soft-state information about the link that it may use | |||
to prevent or minimize fragmentation depending on its role in the | to prevent or minimize fragmentation depending on its role in the | |||
virtualized network. The NVE can maintain this state (the MTU size of | virtualized network. The NVE can maintain this state (the MTU size of | |||
the tunnel link(s) associated with the tunnel endpoint), so if a | the tunnel link(s) associated with the tunnel endpoint), so if a | |||
tenant system sends large packets that when encapsulated exceed the | tenant system sends large packets that, when encapsulated, exceed the | |||
MTU size of the tunnel link, the tunnel endpoint can discard such | MTU size of the tunnel link, the tunnel endpoint can discard such | |||
packets and send exception messages to the tenant system(s). If the | packets and send exception messages to the tenant system(s). If the | |||
tunnel endpoint is associated with a routing or forwarding function and/or ha s the capability | tunnel endpoint is associated with a routing or forwarding function and/or ha s the capability | |||
to send ICMP messages, the encapsulating tunnel endpoint MAY send ICMP fragme | to send ICMP messages, the encapsulating tunnel endpoint <bcp14>MAY</bcp14> s | |||
ntation | end ICMP fragmentation | |||
needed <xref target="RFC0792"/> or Packet Too Big <xref target="RFC4443"/> me | needed <xref target="RFC0792" format="default"/> or Packet Too Big <xref targ | |||
ssages to the tenant system(s). | et="RFC4443" format="default"/> messages to the tenant system(s). | |||
When determining the MTU size of a tunnel link, maximum length of options MUS | When determining the MTU size of a tunnel link, the maximum length of options | |||
T be assumed as options may vary | <bcp14>MUST</bcp14> be assumed as options may vary | |||
on a per-packet basis. For example, recommendations/guidance for handling fra | on a per-packet basis. Recommendations and guidance for handling fragmentatio | |||
gmentation in | n in | |||
similar overlay encapsulation services like PWE3 are provided in | similar overlay encapsulation services like Pseudowire Emulation | |||
Section 5.3 of <xref target="RFC3985"/>.</t> | Edge-to-Edge (PWE3) are provided in <xref target="RFC3985" | |||
sectionFormat="of" section="5.3"/>.</t> | ||||
<t> | <t> | |||
Note that some implementations may not be capable of supporting | Note that some implementations may not be capable of supporting | |||
fragmentation or other less common features of the IP header, such as | fragmentation or other less common features of the IP header, such as | |||
options and extension headers. For example, some of the issues associated | options and extension headers. Some of the issues associated | |||
with MTU size and fragmentation in IP tunneling and use of ICMP messages is | with MTU size and fragmentation in IP tunneling and use of ICMP messages are | |||
outlined in Section 4.2 of <xref target="I-D.ietf-intarea-tunnels"/>.</t> | outlined in <xref target="I-D.ietf-intarea-tunnels" | |||
sectionFormat="of" section="4.2"/>.</t> | ||||
</section> | </section> | |||
<section anchor="sec-4.4.2" numbered="true" toc="default"> | ||||
<section title="DSCP, ECN and TTL" anchor="section-4.4.2"><t> | <name>DSCP, ECN, and TTL</name> | |||
<t> | ||||
When encapsulating IP (including over Ethernet) packets in Geneve, | When encapsulating IP (including over Ethernet) packets in Geneve, | |||
there are several considerations for propagating DSCP and ECN bits | there are several considerations for propagating Differentiated Services | |||
Code Point (DSCP) and Explicit Congestion Notification (ECN) bits | ||||
from the inner header to the tunnel on transmission and the reverse | from the inner header to the tunnel on transmission and the reverse | |||
on reception.</t> | on reception.</t> | |||
<t> | <t> | |||
<xref target="RFC2983"/> provides guidance for mapping DSCP between inner and | <xref target="RFC2983" format="default"/> provides guidance for mapping DSCP | |||
outer | between inner and outer | |||
IP headers. Network virtualization is typically more closely aligned | IP headers. Network virtualization is typically more closely aligned | |||
with the Pipe model described, where the DSCP value on the tunnel | with the Pipe model described, where the DSCP value on the tunnel | |||
header is set based on a policy (which may be a fixed value, one | header is set based on a policy (which may be a fixed value, one | |||
based on the inner traffic class, or some other mechanism for | based on the inner traffic class or some other mechanism for | |||
grouping traffic). Aspects of the Uniform model (which treats the | grouping traffic). Aspects of the Uniform model (which treats the | |||
inner and outer DSCP value as a single field by copying on ingress | inner and outer DSCP values as a single field by copying on ingress | |||
and egress) may also apply, such as the ability to remark the inner | and egress) may also apply, such as the ability to re-mark the inner | |||
header on tunnel egress based on transit marking. However, the | header on tunnel egress based on transit marking. However, the | |||
Uniform model is not conceptually consistent with network | Uniform model is not conceptually consistent with network | |||
virtualization, which seeks to provide strong isolation between | virtualization, which seeks to provide strong isolation between | |||
encapsulated traffic and the physical network.</t> | encapsulated traffic and the physical network.</t> | |||
<t> | ||||
<t> | <xref target="RFC6040" format="default"/> describes the mechanism for exposin | |||
<xref target="RFC6040"/> describes the mechanism for exposing ECN capabilitie | g ECN capabilities on IP | |||
s on IP | ||||
tunnels and propagating congestion markers to the inner packets. | tunnels and propagating congestion markers to the inner packets. | |||
This behavior MUST be followed for IP packets encapsulated in Geneve.</t> | This behavior <bcp14>MUST</bcp14> be followed for IP packets encapsulated in | |||
<t> | Geneve.</t> | |||
Though Uniform or Pipe models could be used for TTL (or Hop Limit in case of | <t> | |||
IPv6) | Though either the Uniform or Pipe models could be used for handling TTL (or H | |||
handling when tunneling IP packets, the Pipe model is more aligned with netwo | op Limit in case of IPv6) when tunneling IP packets, the Pipe model is more cons | |||
rk virtualization. | istent with network virtualization. | |||
<xref target="RFC2003"/> provides guidance on handling TTL between inner IP h | <xref target="RFC2003" format="default"/> provides guidance on handling TTL b | |||
eader and outer IP tunnels; | etween inner IP header and outer IP tunnels; | |||
this model is more aligned with the Pipe model and is RECOMMENDED for | this model is similar to the Pipe model and is <bcp14>RECOMMENDED</bcp14> for | |||
use with Geneve for network virtualization applications.</t> | use with Geneve for network virtualization applications.</t> | |||
</section> | ||||
</section> | <section anchor="sec-4.4.3" numbered="true" toc="default"> | |||
<name>Broadcast and Multicast</name> | ||||
<section title="Broadcast and Multicast" anchor="section-4.4.3"><t> | <t> | |||
Geneve tunnels may either be point-to-point unicast between two | Geneve tunnels may either be point-to-point unicast between two | |||
tunnel endpoints or may utilize broadcast or multicast addressing. It is | tunnel endpoints or utilize broadcast or multicast addressing. It is | |||
not required that inner and outer addressing match in this respect. | not required that inner and outer addressing match in this respect. | |||
For example, in physical networks that do not support multicast, | For example, in physical networks that do not support multicast, | |||
encapsulated multicast traffic may be replicated into multiple | encapsulated multicast traffic may be replicated into multiple | |||
unicast tunnels or forwarded by policy to a unicast location | unicast tunnels or forwarded by policy to a unicast location | |||
(possibly to be replicated there).</t> | (possibly to be replicated there).</t> | |||
<t> | ||||
<t> | With physical networks that do support multicast, it may be desirable | |||
With physical networks that do support multicast it may be desirable | ||||
to use this capability to take advantage of hardware replication for | to use this capability to take advantage of hardware replication for | |||
encapsulated packets. In this case, multicast addresses may be | encapsulated packets. In this case, multicast addresses may be | |||
allocated in the physical network corresponding to tenants, | allocated in the physical network corresponding to tenants, | |||
encapsulated multicast groups, or some other factor. The allocation | encapsulated multicast groups, or some other factor. The allocation | |||
of these groups is a component of the control plane and therefore | of these groups is a component of the control plane and, therefore, | |||
is beyond the scope of this document.</t> | is beyond the scope of this document.</t> | |||
<t> | ||||
<t> | ||||
When physical multicast is in | When physical multicast is in | |||
use, devices with heterogeneous capabilities may be present in the same group . | use, devices with heterogeneous capabilities may be present in the same group . | |||
Some options may only be interpretable by a subset of the devices in the grou p. | Some options may only be interpretable by a subset of the devices in the grou p. | |||
Other devices can safely ignore such options unless the 'C' bit is set to | Other devices can safely ignore such options unless the 'C' bit is set to | |||
mark the unknown option as critical. Requirements outlined in <xref target=" section-3.4"/> | mark the unknown option as critical. The requirements outlined in <xref targ et="sec-3.4" format="default"/> | |||
apply for critical options.</t> | apply for critical options.</t> | |||
<t> | ||||
<t> | In addition, <xref target="RFC8293" format="default"/> provides examples of v | |||
In addition, <xref target="RFC8293"/> provides examples of various mechanisms | arious mechanisms that can | |||
that can | ||||
be used for multicast handling in network virtualization overlay networks.</t > | be used for multicast handling in network virtualization overlay networks.</t > | |||
</section> | ||||
</section> | <section anchor="sec-4.4.4" numbered="true" toc="default"> | |||
<name>Unidirectional Tunnels</name> | ||||
<section title="Unidirectional Tunnels" anchor="section-4.4.4"><t> | <t> | |||
Generally speaking, a Geneve tunnel is a unidirectional concept. IP | Generally speaking, a Geneve tunnel is a unidirectional concept. IP | |||
is not a connection oriented protocol and it is possible for two | is not a connection-oriented protocol, and it is possible for two | |||
tunnel endpoints to communicate with each other using different paths or to | tunnel endpoints to communicate with each other using different paths or to | |||
have one side not transmit anything at all. As Geneve is an IP-based | have one side not transmit anything at all. As Geneve is an IP-based | |||
protocol, the tunnel layer inherits these same characteristics.</t> | protocol, the tunnel layer inherits these same characteristics.</t> | |||
<t> | ||||
<t> | ||||
It is possible for a tunnel to encapsulate a protocol, such as TCP, | It is possible for a tunnel to encapsulate a protocol, such as TCP, | |||
which is connection oriented and maintains session state at that | that is connection oriented and maintains session state at that | |||
layer. In addition, implementations MAY model Geneve tunnels as | layer. In addition, implementations <bcp14>MAY</bcp14> model Geneve tunnels | |||
connected, bidirectional links, such as to provide the abstraction of | as | |||
connected, bidirectional links, for example, to provide the abstraction of | ||||
a virtual port. In both of these cases, bidirectionality of the | a virtual port. In both of these cases, bidirectionality of the | |||
tunnel is handled at a higher layer and does not affect the operation | tunnel is handled at a higher layer and does not affect the operation | |||
of Geneve itself.</t> | of Geneve itself.</t> | |||
</section> | ||||
</section> | </section> | |||
<section anchor="sec-4.5" numbered="true" toc="default"> | ||||
</section> | <name>Constraints on Protocol Features</name> | |||
<t> | ||||
<section title="Constraints on Protocol Features" anchor="section-4.5"><t | Geneve is intended to be flexible for use with a wide range of current and | |||
> | ||||
Geneve is intended to be flexible to a wide range of current and | ||||
future applications. As a result, certain constraints may be placed | future applications. As a result, certain constraints may be placed | |||
on the use of metadata or other aspects of the protocol in order to | on the use of metadata or other aspects of the protocol in order to | |||
optimize for a particular use case. For example, some applications | optimize for a particular use case. For example, some applications | |||
may limit the types of options which are supported or enforce a | may limit the types of options that are supported or enforce a | |||
maximum number or length of options. Other applications may only | maximum number or length of options. Other applications may only | |||
handle certain encapsulated payload types, such as Ethernet or IP. | handle certain encapsulated payload types, such as Ethernet or IP. | |||
This could be either globally throughout the system or, for example, | These optimizations can be implemented either globally (throughout | |||
restricted to certain classes of devices or network paths.</t> | the system) or locally (for example, restricted to certain classes | |||
of devices or network paths).</t> | ||||
<t> | <t> | |||
These constraints may be communicated to tunnel endpoints either | These constraints may be communicated to tunnel endpoints either | |||
explicitly through a control plane or implicitly by the nature of the | explicitly through a control plane or implicitly by the nature of the | |||
application. As Geneve is defined as a data plane protocol that is | application. As Geneve is defined as a data plane protocol that is | |||
control plane agnostic, definition of such mechanisms are beyond the scope of this | control plane agnostic, definition of such mechanisms is beyond the scope of this | |||
document.</t> | document.</t> | |||
<section anchor="sec-4.5.1" numbered="true" toc="default"> | ||||
<section title="Constraints on Options" anchor="section-4.5.1"><t> | <name>Constraints on Options</name> | |||
<t> | ||||
While Geneve options are flexible, a control plane may restrict | While Geneve options are flexible, a control plane may restrict | |||
the number of option TLVs as well as the order and size of the TLVs | the number of option TLVs as well as the order and size of the TLVs | |||
between tunnel endpoints to make it simpler for a data plane | between tunnel endpoints to make it simpler for a data plane | |||
implementation in software or hardware to handle <xref target="I-D.ietf-nvo3- | implementation in software or hardware to handle (see <xref target="I-D.ietf- | |||
encap"/>. | nvo3-encap" format="default"/>). | |||
For example, there may be some critical information such as a secure | For example, there may be some critical information, such as a secure | |||
hash that must be processed in a certain order to provide lowest | hash, that must be processed in a certain order to provide the lowest | |||
latency or there may be other scenarios where the options must be processed | latency, or there may be other scenarios where the options must be | |||
in a certain order due to protocol semantics.</t> | processed in a given order due to protocol semantics.</t> | |||
<t> | ||||
<t> | ||||
A control plane may negotiate a subset of option TLVs and certain TLV | A control plane may negotiate a subset of option TLVs and certain TLV | |||
ordering, as well may limit the total number of option TLVs present | ordering; it may also limit the total number of option TLVs present | |||
in the packet, for example, to accommodate hardware capable of | in the packet, for example, to accommodate hardware capable of | |||
processing fewer options <xref target="I-D.ietf-nvo3-encap"/>. Hence, a cont | processing fewer options. Hence, a control plane | |||
rol plane | needs to have the ability to describe the supported TLV subset and | |||
needs to have the ability to describe the supported TLVs subset and | its ordering to the tunnel endpoints. In the absence of a control | |||
their order to the tunnel endpoints. In the absence of a control | ||||
plane, alternative configuration mechanisms may be used for this | plane, alternative configuration mechanisms may be used for this | |||
purpose. Such mechanisms are beyond the scope of this document.</t> | purpose. Such mechanisms are beyond the scope of this document.</t> | |||
</section> | ||||
</section> | ||||
<section anchor="sec-4.6" numbered="true" toc="default"> | ||||
<name>NIC Offloads</name> | ||||
</section> | <t> | |||
</section> | ||||
<section title="NIC Offloads" anchor="section-4.6"><t> | ||||
Modern NICs currently provide a variety of offloads to enable the | Modern NICs currently provide a variety of offloads to enable the | |||
efficient processing of packets. The implementation of many of these | efficient processing of packets. The implementation of many of these | |||
offloads requires only that the encapsulated packet be easily parsed | offloads requires only that the encapsulated packet be easily parsed | |||
(for example, checksum offload). However, optimizations such as LSO | (for example, checksum offload). However, optimizations such as LSO | |||
and LRO involve some processing of the options themselves since they | and LRO involve some processing of the options themselves since they | |||
must be replicated/merged across multiple packets. In these | must be replicated/merged across multiple packets. In these | |||
situations, it is desirable to not require changes to the offload | situations, it is desirable not to require changes to the offload | |||
logic to handle the introduction of new options. To enable this, | logic to handle the introduction of new options. To enable this, | |||
some constraints are placed on the definitions of options to allow | some constraints are placed on the definitions of options to allow | |||
for simple processing rules:</t> | for simple processing rules:</t> | |||
<ul spacing="normal"> | ||||
<t><list style="symbols"><t>When performing LSO, a NIC MUST replicate the | <li>When performing LSO, a NIC <bcp14>MUST</bcp14> replicate the entir | |||
entire Geneve header | e Geneve header | |||
and all options, including those unknown to the device, onto each | and all options, including those unknown to the device, onto each | |||
resulting segment unless an option allows an exception. | resulting segment unless an option allows an exception. | |||
Conversely, when performing LRO, a NIC may assume that a | Conversely, when performing LRO, a NIC may assume that a | |||
binary comparison of the options (including unknown options) is | binary comparison of the options (including unknown options) is | |||
sufficient to ensure equality and MAY merge packets with equal | sufficient to ensure equality and <bcp14>MAY</bcp14> merge packets with eq | |||
Geneve headers.</t> | ual | |||
Geneve headers.</li> | ||||
<t>Options MUST NOT be reordered during the course of offload | <li>Options <bcp14>MUST NOT</bcp14> be reordered during the course of | |||
processing, including when merging packets for the purpose of LRO.</t> | offload | |||
processing, including when merging packets for the purpose of LRO.</li> | ||||
<t>NICs performing offloads MUST NOT drop packets with unknown | <li>NICs performing offloads <bcp14>MUST NOT</bcp14> drop packets with | |||
options, including those marked as critical, unless explicitly configured. | unknown | |||
</t> | options, including those marked as critical, unless explicitly configured | |||
to do so.</li> | ||||
</list> | </ul> | |||
</t> | <t> | |||
<t> | ||||
There is no requirement that a given implementation of Geneve employ | There is no requirement that a given implementation of Geneve employ | |||
the offloads listed as examples above. However, as these offloads | the offloads listed as examples above. However, as these offloads | |||
are currently widely deployed in commercially available NICs, the | are currently widely deployed in commercially available NICs, the | |||
rules described here are intended to enable efficient handling of | rules described here are intended to enable efficient handling of | |||
current and future options across a variety of devices.</t> | current and future options across a variety of devices.</t> | |||
</section> | ||||
</section> | <section anchor="sec-4.7" numbered="true" toc="default"> | |||
<name>Inner VLAN Handling</name> | ||||
<section title="Inner VLAN Handling" anchor="section-4.7"><t> | <t> | |||
Geneve is capable of encapsulating a wide range of protocols and | Geneve is capable of encapsulating a wide range of protocols; therefore, a gi | |||
therefore a given implementation is likely to support only a small | ven implementation is likely to support only a small | |||
subset of the possibilities. However, as Ethernet is expected to be | subset of the possibilities. However, as Ethernet is expected to be | |||
widely deployed, it is useful to describe the behavior of VLANs | widely deployed, it is useful to describe the behavior of VLANs | |||
inside encapsulated Ethernet frames.</t> | inside encapsulated Ethernet frames.</t> | |||
<t> | ||||
<t> | As with any protocol, support for inner VLAN headers is <bcp14>OPTIONAL</bcp1 | |||
As with any protocol, support for inner VLAN headers is OPTIONAL. In | 4>. In | |||
many cases, the use of encapsulated VLANs may be disallowed due to | many cases, the use of encapsulated VLANs may be disallowed due to | |||
security or implementation considerations. However, in other cases | security or implementation considerations. However, in other cases, the trun | |||
trunking of VLAN frames across a Geneve tunnel can prove useful. As | king of VLAN frames across a Geneve tunnel can prove useful. As | |||
a result, the processing of inner VLAN tags upon ingress or egress | a result, the processing of inner VLAN tags upon ingress or egress | |||
from a tunnel endpoint is based upon the configuration of the tunnel | from a tunnel endpoint is based upon the configuration of the tunnel | |||
endpoint and/or control plane and not explicitly defined as part of | endpoint and/or control plane and is not explicitly defined as part of | |||
the data format.</t> | the data format.</t> | |||
</section> | ||||
</section> | </section> | |||
<section anchor="sec-5" numbered="true" toc="default"> | ||||
</section> | <name>Transition Considerations</name> | |||
<t> | ||||
<section title="Transition Considerations" anchor="section-5"><t> | ||||
Viewed exclusively from the data plane, Geneve is compatible with existing IP networks | Viewed exclusively from the data plane, Geneve is compatible with existing IP networks | |||
as it appears to most devices as UDP packets. | as it appears to most devices as UDP packets. | |||
However, as there are already a number of tunnel protocols deployed | However, as there are already a number of tunneling protocols deployed | |||
in network virtualization environments, there is a practical question | in network virtualization environments, there is a practical question | |||
of transition and coexistence.</t> | of transition and coexistence.</t> | |||
<t> | ||||
<t> | ||||
Since Geneve builds on the base data plane functionality provided by the most | Since Geneve builds on the base data plane functionality provided by the most | |||
common protocols used for network virtualization (VXLAN, NVGRE) | common protocols used for network virtualization (VXLAN and NVGRE), | |||
it should be straightforward to port an existing control plane | it should be straightforward to port an existing control plane | |||
to run on top of it with minimal effort. With both the old and new | to run on top of it with minimal effort. With both the old and new | |||
packet formats supporting the same set of capabilities, there is no | packet formats supporting the same set of capabilities, there is no | |||
need for a hard transition - tunnel endpoints directly communicating with | need for a hard transition; tunnel endpoints directly communicating with | |||
each other can use any common protocol, which may be different even | each other can use any common protocol, which may be different even | |||
within a single overall system. As transit devices are primarily | within a single overall system. | |||
As transit devices are primarily | ||||
forwarding packets on the basis of the IP header, all protocols | forwarding packets on the basis of the IP header, all protocols | |||
appear similar and these devices do not introduce additional | appear to be similar, and these devices do not introduce additional | |||
interoperability concerns.</t> | interoperability concerns.</t> | |||
<t> | ||||
<t> | ||||
To assist with this transition, it is strongly suggested that | To assist with this transition, it is strongly suggested that | |||
implementations support simultaneous operation of both Geneve and | implementations support simultaneous operation of both Geneve and | |||
existing tunnel protocols as it is expected to be common for a single | existing tunneling protocols, as it is expected to be common for a single | |||
node to communicate with a mixture of other nodes. Eventually, older | node to communicate with a mixture of other nodes. Eventually, older | |||
protocols may be phased out as they are no longer in use.</t> | protocols may be phased out as they are no longer in use.</t> | |||
</section> | ||||
<section anchor="sec-6" numbered="true" toc="default"> | ||||
<name>Security Considerations</name> | ||||
</section> | <t> | |||
As it is encapsulated within a UDP/IP packet, Geneve does not have any inhere | ||||
<section title="Security Considerations" anchor="section-6"><t> | nt security | |||
As encapsulated within a UDP/IP packet, Geneve does not have any inherent sec | mechanisms. | |||
urity | As a result, an attacker with access to the underlay | |||
mechanisms. As a result, an attacker with access to the underlay | network transporting the IP packets has the ability to snoop on, alter, or | |||
network transporting the IP packets has the ability to snoop, alter or | ||||
inject packets. Compromised tunnel endpoints or transit devices may also | inject packets. Compromised tunnel endpoints or transit devices may also | |||
spoof identifiers in the tunnel header to gain access to networks | spoof identifiers in the tunnel header to gain access to networks | |||
owned by other tenants.</t> | owned by other tenants.</t> | |||
<t> | ||||
<t> | ||||
Within a particular security domain, such as a data center operated | Within a particular security domain, such as a data center operated | |||
by a single service provider, the most common and highest performing security | by a single service provider, the most common and highest-performing security | |||
mechanism is isolation of trusted components. Tunnel traffic can be | mechanism is isolation of trusted components. Tunnel traffic can be | |||
carried over a separate VLAN and filtered at any untrusted | carried over a separate VLAN and filtered at any untrusted | |||
boundaries.</t> | boundaries.</t> | |||
<t> | ||||
<t> | ||||
When crossing an untrusted link, such as the general Internet, VPN technologi es such as IPsec | When crossing an untrusted link, such as the general Internet, VPN technologi es such as IPsec | |||
<xref target="RFC4301"/> should be used to provide authentication and/or encr | <xref target="RFC4301" format="default"/> should be used to provide authentic | |||
yption of | ation and/or encryption of | |||
the IP packets formed as part of Geneve encapsulation (See <xref target="sect | the IP packets formed as part of Geneve encapsulation (see <xref target="sec- | |||
ion-6.1.1"/>).</t> | 6.1.1" format="default"/>).</t> | |||
<t> | ||||
<t> | ||||
Geneve does not otherwise affect the security of the encapsulated | Geneve does not otherwise affect the security of the encapsulated | |||
packets. As per the guidelines of BCP 72 <xref target="RFC3552"/>, the follow ing sections | packets. As per the guidelines of BCP 72 <xref target="RFC3552" format="defau lt"/>, the following sections | |||
describe potential security risks that may be applicable to Geneve deployment s | describe potential security risks that may be applicable to Geneve deployment s | |||
and approaches to mitigate such risks. It is also noted that not all such ris ks are applicable | and approaches to mitigate such risks. It is also noted that not all such ris ks are applicable | |||
to all Geneve deployment scenarios, i.e., only a subset may be applicable to certain deployments. | to all Geneve deployment scenarios, i.e., only a subset may be applicable to certain deployments. | |||
So an operator has to make an assessment based on their network environment a | An operator has to make an assessment based on their network | |||
nd determine the risks | environment, determine the risks that are applicable to their specific enviro | |||
that are applicable to their specific environment and use appropriate mitigat | nment, and use appropriate mitigation approaches as applicable. </t> | |||
ion approaches as applicable. </t> | <section anchor="sec-6.1" numbered="true" toc="default"> | |||
<name>Data Confidentiality</name> | ||||
<section title="Data Confidentiality" anchor="section-6.1"><t> | <t> | |||
Geneve is a network virtualization overlay encapsulation protocol | Geneve is a network virtualization overlay encapsulation protocol | |||
designed to establish tunnels between NVEs | designed to establish tunnels between NVEs | |||
over an existing IP network. It can be used to deploy multi-tenant overla y networks | over an existing IP network. It can be used to deploy multi-tenant overla y networks | |||
over an existing IP underlay network in a public or private data center. | over an existing IP underlay network in a public or private data center. | |||
The overlay service is typically provided by a service provider, for exam | ||||
ple a | The overlay service is typically provided by a service provider, such as | |||
cloud services provider or a private data center operator, this may or no | a | |||
t may be | cloud service provider or a private data center operator. This may or not | |||
may be | ||||
the same provider as an underlay service provider. Due to the nature of m ulti-tenancy in such environments, | the same provider as an underlay service provider. Due to the nature of m ulti-tenancy in such environments, | |||
a tenant system may expect data confidentiality to ensure its packet data is not tampered with | a tenant system may expect data confidentiality to ensure its packet data is not tampered with | |||
(active attack) in transit or a target of unauthorized monitoring (passiv | (i.e., active attack) in transit or is a target of unauthorized | |||
e attack) | monitoring (i.e., passive attack), for example, by other tenant systems o | |||
for example by other tenant systems or underlay service provider. | r underlay service provider. | |||
A compromised network node or a transit device within a | A compromised network node or a transit device within a | |||
data center may passively monitor Geneve packet data between NVEs; or rou te | data center may passively monitor Geneve packet data between NVEs or rout e | |||
traffic for further inspection. A tenant may | traffic for further inspection. A tenant may | |||
expect the overlay service provider to provide data confidentiality as pa rt of the service or | expect the overlay service provider to provide data confidentiality as pa rt of the service, or | |||
a tenant may bring its own data confidentiality mechanisms like IPsec or TLS to protect the data | a tenant may bring its own data confidentiality mechanisms like IPsec or TLS to protect the data | |||
end to end between its tenant systems. The overlay provider is expected t o provide | end to end between its tenant systems. The overlay provider is expected t o provide | |||
cryptographic protection in cases where the underlay provider is not the | cryptographic protection in cases where the underlay provider is not the | |||
same as the overlay provider to ensure the payload is not exposed to the underlay.</t> | same as the overlay provider to ensure the payload is not exposed to the underlay.</t> | |||
<t> | <t> | |||
If an operator determines data confidentiality is necessary in their envi ronment | If an operator determines data confidentiality is necessary in their envi ronment | |||
based on their risk analysis, for example as in multi-tenant environments | based on their risk analysis -- for example, in multi-tenant | |||
, | environments -- then an encryption mechanism <bcp14>SHOULD</bcp14> be use | |||
then an encryption mechanism SHOULD be used to encrypt the tenant | d to encrypt the tenant | |||
data end to end between the NVEs. The NVEs may use existing well establis | data end to end between the NVEs. The NVEs may use existing well-establis | |||
hed | hed | |||
encryption mechanisms such as IPsec, DTLS, etc.</t> | encryption mechanisms, such as IPsec, DTLS, etc.</t> | |||
<section anchor="sec-6.1.1" numbered="true" toc="default"> | ||||
<section title="Inter-Data Center Traffic" anchor="section-6.1.1"><t> | <name>Inter-Data Center Traffic</name> | |||
<t> | ||||
A tenant system in a customer premises (private data center) may want to connect | A tenant system in a customer premises (private data center) may want to connect | |||
to tenant systems on their tenant overlay network in a public cloud data | to tenant systems on their tenant overlay network in a public cloud data | |||
center | center, or a tenant may want to have its tenant systems located in multiple geog | |||
or a tenant may want to have its tenant systems located in multiple geogr | raphically | |||
aphically | ||||
separated data centers for high availability. Geneve data traffic between tenant systems | separated data centers for high availability. Geneve data traffic between tenant systems | |||
across such separated networks should be protected from threats when trav ersing public networks. | across such separated networks should be protected from threats when trav ersing public networks. | |||
Any Geneve overlay data leaving the data center network beyond the operat or's security domain | Any Geneve overlay data leaving the data center network beyond the operat or's security domain | |||
SHOULD be secured by encryption mechanisms such as | <bcp14>SHOULD</bcp14> be secured by encryption mechanisms, such as | |||
IPsec or other VPN technologies to protect the communications between the | IPsec or other VPN technologies, to protect the communications between th | |||
NVEs | e NVEs | |||
when they are geographically separated over untrusted network links. Spec ification of | when they are geographically separated over untrusted network links. Spec ification of | |||
data protection mechanisms employed between data centers is beyond the sc ope of this document.</t> | data protection mechanisms employed between data centers is beyond the sc ope of this document.</t> | |||
<t> | <t> | |||
The principles described in <xref target="section-4"/> regarding controll | The principles described in <xref target="sec-4" format="default"/> regar | |||
ed environments still apply to | ding controlled environments still apply to | |||
the geographically separated data center usage outlined in this section.< /t> | the geographically separated data center usage outlined in this section.< /t> | |||
</section> | </section> | |||
</section> | ||||
</section> | <section anchor="sec-6.2" numbered="true" toc="default"> | |||
<name>Data Integrity</name> | ||||
<section title="Data Integrity" anchor="section-6.2"><t> | <t> | |||
Geneve encapsulation is used between NVEs to establish overlay tunnels ov er an existing | Geneve encapsulation is used between NVEs to establish overlay tunnels ov er an existing | |||
IP underlay network. In a multi-tenant data center, a rogue or compromis ed tenant system | IP underlay network. In a multi-tenant data center, a rogue or compromis ed tenant system | |||
may try to launch a passive attack such as monitoring the traffic of othe | may try to launch a passive attack, such as monitoring the traffic of oth | |||
r tenants, or an | er tenants, or an | |||
active attack such as trying to inject unauthorized Geneve encapsulated t | active attack, such as trying to inject unauthorized Geneve encapsulated | |||
raffic such | traffic such | |||
as spoofing, replay, etc., into the network. To prevent such attacks, an | as spoofing, replay, etc., into the network. To prevent such attacks, an | |||
NVE MUST NOT | NVE <bcp14>MUST NOT</bcp14> | |||
propagate Geneve packets beyond the NVE to tenant systems and SHOULD empl | propagate Geneve packets beyond the NVE to tenant systems and <bcp14>SHOU | |||
oy packet filtering | LD</bcp14> employ packet-filtering | |||
mechanisms so as not to forward unauthorized traffic between tenant syste ms in different tenant networks. | mechanisms so as not to forward unauthorized traffic between tenant syste ms in different tenant networks. | |||
An NVE MUST NOT interpret Geneve packets from tenant systems other than a | An NVE <bcp14>MUST NOT</bcp14> interpret Geneve packets from tenant syste | |||
s frames to be encapsulated.</t> | ms other than as frames to be encapsulated.</t> | |||
<t> | ||||
<t> | ||||
A compromised network node or a transit device within a data center may l aunch an active | A compromised network node or a transit device within a data center may l aunch an active | |||
attack trying to tamper with the Geneve packet data between NVEs. Malicio us tampering of | attack trying to tamper with the Geneve packet data between NVEs. Malicio us tampering of | |||
Geneve header fields may cause the packet from one tenant to be forwarded to a different | Geneve header fields may cause the packet from one tenant to be forwarded to a different | |||
tenant network. If an operator determines the possibility of such threat in their environment, | tenant network. If an operator determines there is a possibility of such a threat in their environment, | |||
the operator may choose to employ data integrity mechanisms between NVEs. In order to prevent | the operator may choose to employ data integrity mechanisms between NVEs. In order to prevent | |||
such risks, a data integrity mechanism SHOULD be used in such environment | such risks, a data integrity mechanism <bcp14>SHOULD</bcp14> be used in s | |||
s to protect the | uch environments to protect the | |||
integrity of Geneve packets including packet headers, options and payload | integrity of Geneve packets, including packet headers, options, and paylo | |||
on communications | ad on communications | |||
between NVE pairs. A cryptographic data protection mechanism such as IPse | between NVE pairs. A cryptographic data protection mechanism, such as IPs | |||
c may be used to | ec, may be used to | |||
provide data integrity protection. A data center operator may choose to d eploy any other | provide data integrity protection. A data center operator may choose to d eploy any other | |||
data integrity mechanisms as applicable and supported in their underlay n etworks, | data integrity mechanisms as applicable and supported in their underlay n etworks, | |||
although non-cryptographic mechanisms may not protect the Geneve portion of the packet from tampering. </t> | although non-cryptographic mechanisms may not protect the Geneve portion of the packet from tampering. </t> | |||
</section> | </section> | |||
<section anchor="sec-6.3" numbered="true" toc="default"> | ||||
<section title="Authentication of NVE peers" anchor="section-6.3"><t> | <name>Authentication of NVE Peers</name> | |||
<t> | ||||
A rogue network device or a compromised NVE in a data center environment might be able to | A rogue network device or a compromised NVE in a data center environment might be able to | |||
spoof Geneve packets as if it came from a legitimate NVE. In order to mit igate such a risk, | spoof Geneve packets as if it came from a legitimate NVE. In order to mit igate such a risk, | |||
an operator SHOULD use an authentication mechanism, such as IPsec to ensu | an operator <bcp14>SHOULD</bcp14> use an authentication mechanism, such a | |||
re that the | s IPsec, to ensure that the | |||
Geneve packet originated from the intended NVE peer, in environments wher | Geneve packet originated from the intended NVE peer in environments where | |||
e the operator | the operator | |||
determines spoofing or rogue devices is a potential threat. Other simpler | determines spoofing or rogue devices are potential threats. Other simpler | |||
source checks | source checks, | |||
such as ingress filtering for VLAN/MAC/IP address, reverse path forwardin | such as ingress filtering for VLAN/MAC/IP addresses, reverse path forward | |||
g checks, etc., | ing checks, etc., | |||
may be used in certain trusted environments to ensure Geneve packets orig inated | may be used in certain trusted environments to ensure Geneve packets orig inated | |||
from the intended NVE peer.</t> | from the intended NVE peer.</t> | |||
</section> | </section> | |||
<section anchor="sec-6.4" numbered="true" toc="default"> | ||||
<section title="Options Interpretation by Transit Devices" anchor="sectio | <name>Options Interpretation by Transit Devices</name> | |||
n-6.4"><t> | <t> | |||
Options, if present in the packet, are generated and terminated by tunnel endpoints. As indicated | Options, if present in the packet, are generated and terminated by tunnel endpoints. As indicated | |||
in <xref target="section-2.2.1"/>, transit devices may interpret the opti | in <xref target="sec-2.2.1" format="default"/>, transit devices may inter | |||
ons. However, | pret the options. However, | |||
if the packet is protected by tunnel endpoint to tunnel endpoint encrypti | if the packet is protected by encryption from tunnel endpoint | |||
on, for example | to tunnel endpoint (for example, through IPsec), transit devices will not | |||
through IPsec, transit devices will not have visibility into the Geneve h | have visibility into the Geneve header or options | |||
eader or options | in the packet. In such cases, transit devices <bcp14>MUST</bcp14> handle | |||
in the packet. In such cases transit devices MUST handle Geneve packets | Geneve packets as any other IP packet | |||
as any other IP packet | ||||
and maintain consistent forwarding behavior. In cases where options are i nterpreted by transit devices, the operator | and maintain consistent forwarding behavior. In cases where options are i nterpreted by transit devices, the operator | |||
MUST ensure that transit devices are trusted and not compromised. The def inition of | <bcp14>MUST</bcp14> ensure that transit devices are trusted and not compr omised. The definition of | |||
a mechanism to ensure this trust is beyond the scope of this document.</t > | a mechanism to ensure this trust is beyond the scope of this document.</t > | |||
</section> | </section> | |||
<section anchor="sec-6.5" numbered="true" toc="default"> | ||||
<section title="Multicast/Broadcast" anchor="section-6.5"><t> | <name>Multicast/Broadcast</name> | |||
<t> | ||||
In typical data center networks where IP multicasting is not supported in the underlay | In typical data center networks where IP multicasting is not supported in the underlay | |||
network, multicasting may be supported using multiple unicast tunnels. Th e same security | network, multicasting may be supported using multiple unicast tunnels. Th e same security | |||
requirements as described in the above sections can be used to protect Ge neve communications | requirements as described in the above sections can be used to protect Ge neve communications | |||
between NVE peers. If IP multicasting is supported in the underlay networ k and the operator | between NVE peers. If IP multicasting is supported in the underlay networ k and the operator | |||
chooses to use it for multicast traffic among tunnel endpoints, then the operator in such | chooses to use it for multicast traffic among tunnel endpoints, then the operator in such | |||
environments may use data protection mechanisms such as IPsec with multic | environments may use data protection mechanisms, such as IPsec with multi | |||
ast | cast | |||
extensions <xref target="RFC5374"/> to protect multicast traffic among Ge | extensions <xref target="RFC5374" format="default"/>, to protect multicas | |||
neve NVE groups.</t> | t traffic among Geneve NVE groups.</t> | |||
</section> | </section> | |||
<section anchor="sec-6.6" numbered="true" toc="default"> | ||||
<section title="Control Plane Communications" anchor="section-6.6"><t> | <name>Control Plane Communications</name> | |||
A Network Virtualization Authority (NVA) as outlined in <xref target="RFC | <t> | |||
8014"/> may | A Network Virtualization Authority (NVA) as outlined in <xref target="RFC | |||
8014" format="default"/> may | ||||
be used as a control plane for configuring and managing the Geneve NVEs. The data center | be used as a control plane for configuring and managing the Geneve NVEs. The data center | |||
operator is expected to use security mechanisms to protect the communicat ions between | operator is expected to use security mechanisms to protect the communicat ions between | |||
the NVA to NVEs and use authentication mechanisms to detect any rogue or compromised | the NVA and NVEs and to use authentication mechanisms to detect any rogue or compromised | |||
NVEs within their administrative domain. Data protection mechanisms for control plane | NVEs within their administrative domain. Data protection mechanisms for control plane | |||
communication or authentication mechanisms between the NVA and the NVEs a re beyond | communication or authentication mechanisms between the NVA and NVEs are b eyond | |||
the scope of this document.</t> | the scope of this document.</t> | |||
</section> | </section> | |||
</section> | ||||
</section> | <section anchor="sec-7" numbered="true" toc="default"> | |||
<name>IANA Considerations</name> | ||||
<section title="IANA Considerations" anchor="section-7"><t> | <t> | |||
IANA has allocated UDP port 6081 in the Service Name and Transport Protoc | IANA has allocated UDP port 6081 in the "Service Name and Transport Proto | |||
ol | col | |||
Port Number Registry <xref target="IANA-SN"/> as the well-known destinati | Port Number Registry" <xref target="IANA-SN" format="default"/> as the we | |||
on port | ll-known destination port | |||
for Geneve based on early registration.</t> | for Geneve:</t> | |||
<dl newline="false" spacing="compact"> | ||||
<t>Upon publication of this document, this registration will have its ref | <dt>Service Name:</dt><dd>geneve</dd> | |||
erence changed to cite | <dt>Transport Protocol(s):</dt><dd>UDP</dd> | |||
this document [RFC-to-be] and inline with <xref target="RFC6335"/> the as | <dt>Assignee:</dt><dd>IESG <iesg@ietf.org></dd> | |||
signee and contact of the port entry should be | <dt>Contact:</dt><dd>IETF Chair <chair@ietf.org></dd> | |||
changed to IESG <iesg@ietf.org> and IETF Chair <chair@ietf.org&g | <dt>Description:</dt><dd>Generic Network Virtualization Encapsulation (Geneve)</ | |||
t; respectively:</t> | dd> | |||
<dt>Reference:</dt><dd>[RFC8926]</dd> | ||||
<figure><artwork><![CDATA[ | <dt>Port Number:</dt><dd>6081</dd> | |||
Service Name: geneve | </dl> | |||
Transport Protocol(s): UDP | <t> | |||
Assignee: IESG <iesg@ietf.org> | In addition, IANA has created a new subregistry titled "Geneve Option Class" | |||
Contact: IETF Chair <chair@ietf.org> | for option classes. This registry has been placed under | |||
Description: Generic Network Virtualization Encapsulation (Geneve) | a new "Network Virtualization Overlay (NVO3)" heading in the IANA protocol re | |||
Reference: [RFC-to-be] | gistries <xref target="IANA-PR" format="default"/>. | |||
Port Number: 6081 | The "Geneve Option Class" registry consists of | |||
]]></artwork> | 16-bit hexadecimal values along with descriptive strings, assignee/contact in | |||
</figure> | formation, and references. | |||
The registration rules for the new registry are (as defined by <xref target=" | ||||
<t> | RFC8126" format="default"/>):</t> | |||
In addition, IANA is requested to create a new "Geneve Option Class" | <table align="center"> <name>Geneve Option Class Registry Ranges</name> | |||
registry to allocate Option Classes. This registry is to be placed under | <thead> | |||
a new Network Virtualization Overlay (NVO3) protocols page (to be created) in | <tr> | |||
IANA protocol registries <xref target="IANA-PR"/>. | <th align="left"> Range</th> | |||
The Geneve Option Class registry shall consist of | <th align="left"> Registration Procedures</th> | |||
16-bit hexadecimal values along with descriptive strings, assignee/contact in | </tr> | |||
formation and references. | </thead> | |||
The registration rules for the new registry are (as defined by <xref target=" | <tbody> | |||
RFC8126"/>):</t> | <tr> | |||
<td align="left">0x0000-0x00FF</td> | ||||
<texttable style="full"><ttcol> Range</ttcol> | <td align="left">IETF Review</td> | |||
<ttcol> Registration Procedures</ttcol> | </tr> | |||
<c>0x0000..0x00FF</c> | <tr> | |||
<c>IETF Review</c> | <td align="left">0x0100-0xFEFF</td> | |||
<c>0x0100..0xFEFF</c> | <td align="left">First Come First Served</td> | |||
<c>First Come First Served</c> | </tr> | |||
<c>0xFF00..0xFFFF</c> | <tr> | |||
<c>Experimental Use</c> | <td align="left">0xFF00-0xFFFF</td> | |||
</texttable> | <td align="left">Experimental Use</td> | |||
</tr> | ||||
<t> | </tbody> | |||
Initial registrations in the new registry are as follows:</t> | </table> | |||
</section> | ||||
<texttable style="full"><ttcol> Option Class</ttcol> | ||||
<ttcol> Description</ttcol> <ttcol> Assignee/Contact </ttcol> <tt | ||||
col> References</ttcol> | ||||
<c>0x0100</c> | ||||
<c>Linux</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0101</c> | ||||
<c>Open vSwitch (OVS)</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0102</c> | ||||
<c>Open Virtual Networking (OVN)</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0103</c> | ||||
<c>In-band Network Telemetry (INT)</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0104</c> | ||||
<c>VMware, Inc.</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0105</c> | ||||
<c>Amazon.com, Inc.</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0106</c> | ||||
<c>Cisco Systems, Inc.</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0107</c> | ||||
<c>Oracle Corporation</c> | ||||
<c></c> | ||||
<c></c> | ||||
<c>0x0108..0x0110</c> | ||||
<c>Amazon.com, Inc.</c> | ||||
<c></c> | ||||
<c></c> | ||||
</texttable> | ||||
</section> | ||||
<section title="Contributors" anchor="section-8"><t> | ||||
The following individuals were authors of an earlier version of this | ||||
document and made significant contributions:</t> | ||||
<figure><artwork><![CDATA[ | ||||
Pankaj Garg | ||||
Microsoft Corporation | ||||
1 Microsoft Way | ||||
Redmond, WA 98052 | ||||
USA | ||||
Email: pankajg@microsoft.com | ||||
Chris Wright | ||||
Red Hat Inc. | ||||
1801 Varsity Drive | ||||
Raleigh, NC 27606 | ||||
USA | ||||
Email: chrisw@redhat.com | ||||
Kenneth Duda | ||||
Arista Networks | ||||
5453 Great America Parkway | ||||
Santa Clara, CA 95054 | ||||
USA | ||||
Email: kduda@arista.com | ||||
Dinesh G. Dutt | ||||
Independent | ||||
Email: didutt@gmail.com | ||||
Jon Hudson | ||||
Independent | ||||
Email: jon.hudson@gmail.com | ||||
Ariel Hendel | ||||
Facebook, Inc. | ||||
1 Hacker Way | ||||
Menlo Park, CA 94025 | ||||
USA | ||||
Email: ahendel@fb.com | ||||
]]></artwork> | ||||
</figure> | ||||
</section> | ||||
<section title="Acknowledgements" anchor="section-9"> | </middle> | |||
<t> | <back> | |||
The authors wish to acknowledge Puneet Agarwal, David Black, Sami Boutros | ||||
, Scott Bradner, | ||||
Martin Casado, Alissa Cooper, Roman Danyliw, Bruce Davie, Anoop Ghanwani, | ||||
Benjamin Kaduk, | ||||
Suresh Krishnan, Mirja Kuhlewind, Barry Leiba, Daniel Migault, Greg Mirks | ||||
y, Tal Mizrahi, | ||||
Kathleen Moriarty, Magnus Nystrom, Adam Roach, Sabrina Tanamal, Dave Thal | ||||
er, Eric Vyncke, | ||||
Magnus Westerlund and many other members of the NVO3 WG for their reviews | ||||
, comments and suggestions.</t> | ||||
<t> | <displayreference target="I-D.ietf-nvo3-encap" to="NVO3-ENCAP"/> | |||
The authors would like to thank Sam Aldrin, Alia Atlas, Matthew Bocci, Be | <displayreference target="I-D.ietf-nvo3-dataplane-requirements" to="NVO3-DATAPLA | |||
nson Schliesser, and Martin Vigoureux | NE"/> | |||
for their guidance throughout the process.</t> | <displayreference target="I-D.ietf-intarea-tunnels" to="INTAREA-TUNNELS"/> | |||
</section> | <references> | |||
<name>References</name> | ||||
<references> | ||||
<name>Normative References</name> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.0768.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.0792.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.1122.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.1191.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.2003.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.2119.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.4443.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.6040.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.6936.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.7365.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8085.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8126.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8174.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8200.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8201.xml"/> | ||||
</references> | ||||
<references> | ||||
<name>Informative References</name> | ||||
</middle> | <reference anchor="ETYPES" target="https://www.iana.org/assignments/ieee | |||
-802-numbers"> | ||||
<front> | ||||
<title>IEEE 802 Numbers</title> | ||||
<author> | ||||
<organization>IANA</organization> | ||||
</author> | ||||
</front> | ||||
</reference> | ||||
<back> | <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere | |||
<references title="Normative References"> | nce.I-D.ietf-nvo3-encap.xml"/> | |||
&RFC0768; | ||||
&RFC0792; | ||||
&RFC1112; | ||||
&RFC1191; | ||||
&RFC2003; | ||||
&RFC2119; | ||||
&RFC4443; | ||||
&RFC6040; | ||||
&RFC6936; | ||||
&RFC7365; | ||||
&RFC8085; | ||||
&RFC8126; | ||||
&RFC8174; | ||||
&RFC8200; | ||||
&RFC8201; | ||||
</references> | ||||
<references title="Informative References"> | ||||
<reference anchor="ETYPES" | <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere | |||
target="https://www.iana.org/assignments/ieee-802-numbers"> | nce.I-D.ietf-nvo3-dataplane-requirements.xml"/> | |||
<front> | ||||
<title>IEEE 802 Numbers</title> | ||||
<author> | ||||
<organization>The IEEE Registration Authority</organization> | ||||
</author> | ||||
<date/> | ||||
</front> | ||||
</reference> | ||||
&I-D.ietf-nvo3-encap; | <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere | |||
&I-D.ietf-nvo3-dataplane-requirements; | nce.I-D.ietf-intarea-tunnels.xml"/> | |||
&I-D.ietf-intarea-tunnels; | ||||
<reference anchor="IANA-PR" | <reference anchor="IANA-PR" target="https://www.iana.org/protocols"> | |||
target="https://www.iana.org/protocols"> | <front> | |||
<front> | ||||
<title>Protocol Registries</title> | <title>Protocol Registries</title> | |||
<author> | <author> | |||
<organization>IANA</organization> | <organization>IANA</organization> | |||
</author> | </author> | |||
<date/> | </front> | |||
</front> | </reference> | |||
</reference> | ||||
<reference anchor="IANA-SN" | <reference anchor="IANA-SN" target="https://www.iana.org/assignments/ser | |||
target="https://www.iana.org/assignments/service-names-port-numbers"> | vice-names-port-numbers"> | |||
<front> | <front> | |||
<title>Service Name and Transport Protocol Port Number Registry</tit le> | <title>Service Name and Transport Protocol Port Number Registry</tit le> | |||
<author> | <author> | |||
<organization>IANA</organization> | <organization>IANA</organization> | |||
</author> | </author> | |||
<date/> | </front> | |||
</front> | </reference> | |||
</reference> | ||||
<!--&IEEE.802.1Q_2014;--> | ||||
<reference anchor='IEEE.802.1Q_2018' target='http://ieeexplore.ieee.org/servlet/ | ||||
opac?punumber=8403925'> | ||||
<front> | ||||
<title>IEEE Standard for Local and Metropolitan Area Networks--Bridges and Bri | ||||
dged Networks</title> | ||||
<author> | ||||
<organization>IEEE</organization> | ||||
</author> | ||||
<date day='06' month='July' year='2018' /> | ||||
<abstract><t>This standard specifies how the Media Access Control (MAC) Servic | ||||
e is supported by Bridged Networks, | ||||
the principles of operation of those networks, and the operation of MAC B | ||||
ridges and VLAN Bridges, | ||||
including management, protocols, and algorithms</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name='IEEE' value='802.1Q-2018' /> | ||||
<seriesInfo name='DOI' value='10.1109/ieeestd.2018.8403927' /> | ||||
</reference> | ||||
&RFC2983; | <reference anchor="IEEE.802.1Q_2018" target="http://ieeexplore.ieee.org/servlet/ | |||
&RFC3031; | opac?punumber=8403925"> | |||
&RFC3552; | <front> | |||
&RFC3985; | <title>IEEE Standard for Local and Metropolitan Area Networks--Bridg | |||
&RFC4301; | es and Bridged Networks</title> | |||
&RFC5374; | <seriesInfo name="DOI" value="10.1109/IEEESTD.2018.8403927"/> | |||
&RFC6335; | <seriesInfo name="IEEE" value="802.1Q-2018"/> | |||
&RFC6438; | <author> | |||
&RFC7348; | <organization>IEEE</organization> | |||
&RFC7637; | </author> | |||
&RFC8014; | <date month="July" year="2018"/> | |||
&RFC8086; | </front> | |||
&RFC8293; | </reference> | |||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.2983.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.3031.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.3552.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.3985.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.4301.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.5374.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.6438.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.7348.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.7637.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8014.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8086.xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R | ||||
FC.8293.xml"/> | ||||
<reference anchor="VL2" | <reference anchor="VL2" target="https://dl.acm.org/doi/10.1145/1594977.1 | |||
target="https://www.sigcomm.org/sites/default/files/ccr/papers/2009/Octo | 592576"> | |||
ber/1594977-1592576.pdf"> | <front> | |||
<front> | ||||
<title>VL2: A Scalable and Flexible Data Center Network</title> | <title>VL2: A Scalable and Flexible Data Center Network</title> | |||
<seriesInfo name="DOI" value="10.1145/1594977.1592576"/> | ||||
<author surname="Greenberg, A., et al."> | <author surname="Greenberg, A., et al."> | |||
<organization></organization> | <organization/> | |||
</author> | </author> | |||
<date month="August" year="2009"/> | ||||
</front> | ||||
<refcontent>ACM SIGCOMM Computer Communication Review</refcontent> | ||||
</reference> | ||||
</references> | ||||
</references> | ||||
<section anchor="sec-9" numbered="false" toc="default"> | ||||
<name>Acknowledgements</name> | ||||
<t> | ||||
The authors wish to acknowledge <contact fullname="Puneet Agarwal"/>, | ||||
<contact fullname="David Black"/>, <contact fullname="Sami Boutros"/>, | ||||
<contact fullname="Scott Bradner"/>, | ||||
<contact fullname="Martín Casado"/>, <contact fullname="Alissa Cooper"/>, | ||||
<contact fullname="Roman Danyliw"/>, <contact fullname="Bruce Davie"/>, | ||||
<contact fullname="Anoop Ghanwani"/>, <contact fullname="Benjamin | ||||
Kaduk"/>, <contact fullname="Suresh Krishnan"/>, <contact | ||||
fullname="Mirja Kühlewind"/>, <contact fullname="Barry Leiba"/>, | ||||
<contact fullname="Daniel Migault"/>, <contact fullname="Greg | ||||
Mirksy"/>, <contact fullname="Tal Mizrahi"/>, | ||||
<contact fullname="Kathleen Moriarty"/>, <contact fullname="Magnus | ||||
Nyström"/>, <contact fullname="Adam Roach"/>, <contact fullname="Sabrin | ||||
a | ||||
Tanamal"/>, <contact fullname="Dave Thaler"/>, <contact fullname="Éric | ||||
Vyncke"/>, | ||||
<contact fullname="Magnus Westerlund"/>, and many other members of the NV | ||||
O3 Working Group for their reviews, comments, and suggestions.</t> | ||||
<t> | ||||
The authors would like to thank <contact fullname="Sam Aldrin"/>, | ||||
<contact fullname="Alia Atlas"/>, <contact fullname="Matthew Bocci"/>, | ||||
<contact fullname="Benson Schliesser"/>, and <contact fullname="Martin | ||||
Vigoureux"/> | ||||
for their guidance throughout the process.</t> | ||||
</section> | ||||
<date year="2009" /> | <section anchor="sec-8" numbered="false" toc="default"> | |||
</front> | <name>Contributors</name> | |||
<seriesInfo name="ACM SIGCOMM" value="Computer Communication | <t> | |||
Review"/> | The following individuals were authors of an earlier version of this | |||
<seriesInfo name="DOI" value="10.1145/1594977.1592576"/> | document and made significant contributions:</t> | |||
</reference> | ||||
</references> | <contact fullname="Pankaj Garg" > | |||
</back> | <organization>Microsoft Corporation</organization> | |||
<address> | ||||
<postal> | ||||
<street>1 Microsoft Way</street> | ||||
<city>Redmond</city> | ||||
<region>WA</region><code>98052</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>pankajg@microsoft.com</email> | ||||
</address> | ||||
</contact> | ||||
</rfc> | <contact fullname="Chris Wright" > | |||
<organization>Red Hat Inc.</organization> | ||||
<address> | ||||
<postal> | ||||
<street>1801 Varsity Drive</street> | ||||
<city>Raleigh</city> | ||||
<region>NC</region><code>27606</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>chrisw@redhat.com</email> | ||||
</address> | ||||
</contact> | ||||
<contact fullname="Kenneth Duda" > | ||||
<organization>Arista Networks</organization> | ||||
<address> | ||||
<postal> | ||||
<street>5453 Great America Parkway</street> | ||||
<city>Santa Clara</city> | ||||
<region>CA</region><code>95054</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>kduda@arista.com</email> | ||||
</address> | ||||
</contact> | ||||
<contact fullname="Dinesh G. Dutt" > | ||||
<organization>Independent</organization> | ||||
<address> | ||||
<postal> | ||||
<street></street> | ||||
<city></city> | ||||
<region></region><code></code> | ||||
<country></country> | ||||
</postal> | ||||
<email>didutt@gmail.com</email> | ||||
</address> | ||||
</contact> | ||||
<contact fullname="Jon Hudson" > | ||||
<organization>Independent</organization> | ||||
<address> | ||||
<postal> | ||||
<street></street> | ||||
<city></city> | ||||
<region></region><code></code> | ||||
<country></country> | ||||
</postal> | ||||
<email>jon.hudson@gmail.com</email> | ||||
</address> | ||||
</contact> | ||||
<contact fullname="Ariel Hendel" > | ||||
<organization>Facebook, Inc.</organization> | ||||
<address> | ||||
<postal> | ||||
<street>1 Hacker Way</street> | ||||
<city>Menlo Park</city> | ||||
<region>CA</region><code>94025</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>ahendel@fb.com</email> | ||||
</address> | ||||
</contact> | ||||
</section> | ||||
</back> | ||||
</rfc> | ||||
End of changes. 253 change blocks. | ||||
1306 lines changed or deleted | 1195 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |