<?xmlversion='1.0' encoding='utf-8'?>version="1.0" encoding="UTF-8"?> <!DOCTYPE rfc SYSTEM"rfc2629.dtd" [ <!ENTITY RFC0768 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0768.xml"> <!ENTITY RFC0792 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0792.xml"> <!ENTITY RFC1112 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1112.xml"> <!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"> <!ENTITY RFC4443 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4443.xml"> <!ENTITY RFC6936 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6936.xml"> <!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8126.xml"> <!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"> <!ENTITY I-D.ietf-nvo3-encap SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-nvo3-encap-05.xml"> <!ENTITY I-D.ietf-nvo3-dataplane-requirements SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-nvo3-dataplane-requirements-03.xml"> <!ENTITY I-D.ietf-intarea-tunnels SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-intarea-tunnels-10.xml"> <!--ENTITY IEEE.802.1Q_2014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml6/reference.IEEE.802.1Q_2014.xml"--> <!ENTITY RFC1191 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1191.xml"> <!ENTITY RFC2003 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2003.xml"> <!ENTITY RFC8200 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8200.xml"> <!ENTITY RFC2983 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2983.xml"> <!ENTITY RFC3031 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3031.xml"> <!ENTITY RFC3552 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3552.xml"> <!ENTITY RFC3985 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3985.xml"> <!ENTITY RFC4301 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4301.xml"> <!ENTITY RFC5374 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5374.xml"> <!ENTITY RFC6040 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6040.xml"> <!ENTITY RFC6335 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6335.xml"> <!ENTITY RFC6438 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6438.xml"> <!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7348.xml"> <!ENTITY RFC7365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7365.xml"> <!ENTITY RFC7637 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7637.xml"> <!ENTITY RFC8014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8014.xml"> <!ENTITY RFC8085 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8085.xml"> <!ENTITY RFC8086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8086.xml"> <!ENTITY RFC8201 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8201.xml"> <!ENTITY RFC8293 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8293.xml"> ]>"rfc2629-xhtml.ent"> <rfcsubmissionType="IETF"xmlns:xi="http://www.w3.org/2001/XInclude" ipr="trust200902" docName="draft-ietf-nvo3-geneve-16"category="std"><?rfc compact="yes"?> <?rfc text-list-symbols="o*+-"?> <?rfc subcompact="no"?> <?rfc sortrefs="yes"?> <?rfc symrefs="yes"?> <?rfc strict="yes"?> <?rfc toc="yes"?>number="8926" submissionType="IETF" category="std" consensus="true" obsoletes="" updates="" xml:lang="en" sortRefs="true" symRefs="true" tocInclude="true" version="3"> <front> <title abbrev="Geneve Protocol">Geneve: Generic Network Virtualization Encapsulation</title> <seriesInfo name="RFC" value="8926"/> <author fullname="Jesse Gross" initials="J." role="editor" surname="Gross"><organization></organization> <address><email>jesse@kernel.org</email><organization/> <address> <email>jesse@kernel.org</email> </address> </author> <author fullname="Ilango Ganga" initials="I." role="editor" surname="Ganga"> <organization abbrev="Intel">Intel Corporation</organization><address><postal><street>2200<address> <postal> <street>2200 Mission College Blvd.</street><street>Santa Clara, CA 95054</street> <street>USA</street><city>Santa Clara</city><region>CA</region><code>95054</code> <country>United States of America</country> </postal> <email>ilango.s.ganga@intel.com</email> </address> </author> <author fullname="T. Sridhar" initials="T." role="editor" surname="Sridhar"> <organization abbrev="VMware">VMware, Inc.</organization><address><postal><street>3401<address> <postal> <street>3401 Hillview Ave.</street><street>Palo Alto, CA 94304</street> <street>USA</street><city>Palo Alto</city><region>CA</region><code>94304</code> <country>United States of America</country> </postal><email>tsridhar@vmware.com</email><email>tsridhar@utexas.edu</email> </address> </author> <dateday="07" month="March"month="November" year="2020"/><abstract><t><keyword>overlay</keyword> <keyword>tunnel</keyword> <keyword>extensible</keyword> <keyword>variable</keyword> <keyword>metadata</keyword> <keyword>options</keyword> <keyword>endpoint</keyword> <keyword>transit</keyword> <abstract> <t> Network virtualization involves the cooperation of devices with a wide variety of capabilities such as software and hardware tunnel endpoints, transit fabrics, and centralized control clusters. As a result of their role in tying together different elementsinof the system, the requirements on tunnels are influenced by all of these components.FlexibilityTherefore, flexibility isthereforethe most important aspect of atunneltunneling protocol if it is to keep pace with the evolution ofthe system.technology. This document describes Geneve, an encapsulation protocol designed to recognize and accommodate these changing capabilities and needs.</t> </abstract> </front> <middle> <sectiontitle="Introduction" anchor="section-1"><t>anchor="sec-1" numbered="true" toc="default"> <name>Introduction</name> <t> Networking has long featured a variety of tunneling, tagging, and other encapsulation mechanisms. However, the advent of network virtualization has caused a surge of renewed interest and a corresponding increase in the introduction of new protocols. The large number of protocols in thisspace,space -- for example, ranging all the way from VLANs <xreftarget="IEEE.802.1Q_2018"/>target="IEEE.802.1Q_2018" format="default"/> and MPLS <xreftarget="RFC3031"/>target="RFC3031" format="default"/> through the more recent VXLAN<xref target="RFC7348"/>(Virtual eXtensible Local Area Network) <xref target="RFC7348" format="default"/> and NVGRE<xref target="RFC7637"/>(Network Virtualization Using Generic RoutingEncapsulation),Encapsulation) <xref target="RFC7637" format="default"/> -- often leads to questions about the need for new encapsulation formats and what it is about network virtualization in particular that leads to their proliferation. Note that the list of protocols presented above is non-exhaustive.</t> <t> While many encapsulation protocols seek to simply partition the underlay network or bridgebetweentwo domains, network virtualization views the transit network as providing connectivity between multiple components of a distributed system. In manywaysways, this system is similar to a chassis switch with the IP underlay network playing the role of the backplane and tunnel endpoints on the edge as line cards. When viewed in this light, the requirements placed on thetunneltunneling protocol are significantly different in terms of the quantity of metadata necessary and the role of transit nodes.</t> <t> Work such as<xref target="VL2"/> (A"VL2: A Scalable and Flexible Data CenterNetwork)Network" <xref target="VL2" format="default"/> andthe NVO3"NVO3 Data PlaneRequirementsRequirements" <xreftarget="I-D.ietf-nvo3-dataplane-requirements"/>target="I-D.ietf-nvo3-dataplane-requirements" format="default"/> have described some of the properties that the data plane must have to support network virtualization. However, one additional defining requirement is the need to carry metadata(e.g.(e.g., system state) along with the packet data; example use cases of metadata are noted below. The use of some metadata is certainly not a foreign concept--- nearly all protocols used for network virtualization have at least 24 bits of identifier space as a way to partition between tenants. This is often described as overcoming the limits of 12-bitVLANs, andVLANs; when seen in thatcontext,context or any context where it is a true tenant identifier, 16 million possible entries is a large number. However, the reality is that the metadata is not exclusively used to identifytenantstenants, and encoding other information quickly starts to crowd the space. In fact, when compared to the tags used to exchange metadata between line cards on a chassis switch, 24-bit identifiers start to look quite small. There are nearly endless uses for this metadata, ranging from storing input port identifiers for simple security policies to sendingservice basedservice-based context for advanced middlebox applications that terminate and re-encapsulate Geneve traffic.</t> <t> Existingtunneltunneling protocols have each attempted to solve different aspects of these newrequirements,requirements only to be quickly rendered out of date by changing control plane implementations and advancements. Furthermore, software and hardware components and controllers all have different advantages and rates of evolution--- a fact that should be viewed as a benefit, not a liability or limitation. Thisdraftdocument describes Geneve, a protocolwhichthat seeks to avoid these problems by providing a framework for tunneling for network virtualization rather than being prescriptive about the entire system.</t> <sectiontitle="Requirements Language" anchor="section-1.1"><t>anchor="sec-1.1" numbered="true" toc="default"> <name>Requirements Language</name> <t> The key words"MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY","<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", "<bcp14>MAY</bcp14>", and"OPTIONAL""<bcp14>OPTIONAL</bcp14>" in this document are to be interpreted as described in BCP 14 <xreftarget="RFC2119"/>target="RFC2119" format="default"/> <xreftarget="RFC8174"/>target="RFC8174" format="default"/> when, and only when, they appear in all capitals, as shown here.</t> </section> <sectiontitle="Terminology" anchor="section-1.2"><t>anchor="sec-1.2" numbered="true" toc="default"> <name>Terminology</name> <t> TheNVO3Network Virtualization over Layer 3 (NVO3) Framework <xreftarget="RFC7365"/>target="RFC7365" format="default"/> defines many of the concepts commonly used in network virtualization. In addition, the following terms are specifically meaningful in this document:</t><t> Checksum offload. An<dl newline="false" spacing="normal"> <dt>Checksum offload:</dt> <dd>An optimization implemented by many NICs (Network InterfaceController) whichControllers) that enables computation and verification ofupper layerupper-layer protocol checksums in hardware on transmit and receive, respectively. This typically includes IP and TCP/UDP checksumswhichthat would otherwise be computed by the protocol stack insoftware.</t> <t> Clos network. Asoftware.</dd> <dt>Clos network:</dt> <dd>A technique for composing network fabrics larger than a single switch while maintaining non-blocking bandwidth across connection points. ECMP is used to divide traffic across the multiple links and switches that constitute the fabric. Sometimes termed "leaf and spine" or "fat tree"topologies.</t> <t> ECMP. Equaltopologies.</dd> <dt>ECMP:</dt> <dd>Equal Cost Multipath. A routing mechanism for selecting from among multiple bestnext hopnext-hop paths by hashing packet headers in order to better utilize network bandwidth while avoiding reordering of packets within aflow.</t> <t> Geneve. Genericflow.</dd> <dt>Geneve:</dt><dd>Generic Network Virtualization Encapsulation. Thetunneltunneling protocol described in thisdocument.</t> <t> LRO. Largedocument.</dd> <dt>LRO:</dt><dd>Large Receive Offload. Thereceive-sidereceiver-side equivalent function of LSO, in which multiple protocol segments (primarily TCP) are coalesced into larger dataunits.</t> <t> LSO.units.</dd> <dt>LSO:</dt><dd> Large Segmentation Offload. A function provided by many commercial NICs that allows data units larger than the MTU to be passed to the NIC to improve performance, the NIC being responsible for creating smaller segments of a size less than or equal to the MTU with correct protocol headers. When referring specifically toTCP/ IP,TCP/IP, this feature is often known as TSO (TCP SegmentationOffload).</t> <t> Middlebox. The term middlebox inOffload).</dd> <dt> Middlebox:</dt><dd> In the context of thisdocumentdocument, the term "middlebox" refers to network service functions orappliances forservice interposition appliances thatwouldtypically implementNVEtunnel endpoint functionality,which terminate or re-encapsulateterminating and re-encapsulating Genevetraffic.</t> <t> NIC. Networktraffic.</dd> <dt>NIC:</dt><dd>Network Interface Controller. Also calledas Network"Network InterfaceCardCard" orNetwork Adapter."Network Adapter". A NIC could be part of a tunnel endpoint or transit device and can either processGeneve packetsor aid in the processing of Genevepackets.</t> <t>packets.</dd> <dt> Transitdevice.device:</dt> <dd> A forwarding element(e.g.(e.g., router or switch) along the path of the tunnel making up part of theUnderlay Network.underlay network. A transit device may be capable of understanding the Geneve packet format but does not originate or terminate Genevepackets.</t> <t>packets.</dd> <dt> Tunnelendpoint.endpoint:</dt><dd> A component performing encapsulation and decapsulation of packets, such as Ethernet frames or IP datagrams, in Geneve headers. As the ultimate consumer of any tunnel metadata, tunnel endpoints have the highest level of requirements for parsing and interpreting tunnel headers. Tunnel endpoints may consist of either software or hardware implementations or a combination of the two. Tunnel endpoints are frequently a component ofan NVE (Networka Network VirtualizationEdge)Edge (NVE) but may also be found in middleboxes or other elements making up an NVO3Network.</t> <t> VM. Virtual Machine.</t>network.</dd> <dt>VM:</dt><dd>Virtual Machine.</dd> </dl> </section> </section> <sectiontitle="Design Requirements" anchor="section-2"><t>anchor="sec-2" numbered="true" toc="default"> <name>Design Requirements</name> <t> Geneve is designed to support network virtualization use cases for data centerenvironments, whereenvironments. In these situations, tunnels are typically established to act as a backplane between the virtual switches residing in hypervisors, physical switches, or middleboxes or other appliances. An arbitrary IP network can be used as anunderlayunderlay, although Clos networks composed using ECMP links are a common choice to provide consistent bisectional bandwidth across all connection points. Many of the concepts of network virtualization overlays overLayer 3IP networks are described in the NVO3 Framework <xreftarget="RFC7365"/>. Figure 1target="RFC7365" format="default"/>. <xref target="ref-sample-geneve-deployment"/> shows an example of a hypervisor,top of racka top-of-rack switch for connectivity to physical servers, and a WAN uplink connected using Geneve tunnels over a simplified Clos network. These tunnels are used to encapsulate and forward frames from the attachedcomponentscomponents, such as VMs or physical links.</t> <figuretitle="Sampleanchor="ref-sample-geneve-deployment"> <name>Sample GeneveDeployment" anchor="ref-sample-geneve-deployment"><artwork><![CDATA[Deployment</name> <artwork name="" type="" align="left" alt=""><![CDATA[ +---------------------+ +-------+ +------+ | +--+ +-------+---+ | |Transit|--|Top of|==Physical | |VM|--| | | | +------+ /|Router | | Rack |==Servers | +--+ |Virtual|NIC|---|Top of|/ +-------+\/+------+ | +--+ |Switch | | | | Rack |\ +-------+/\+------+ | |VM|--| | | | +------+ \|Transit| |Uplink| WAN | +--+ +-------+---+ | |Router |--| |=========> +---------------------+ +-------+ +------+ Hypervisor ()===================================() Switch-Switch Geneve Tunnels ]]></artwork> </figure> <t> To support the needs of network virtualization, thetunneltunneling protocol should be able to take advantage of the differing (and evolving) capabilities of each type of device in both the underlay and overlay networks. This results in the following requirements being placed on the data plane tunneling protocol:</t><t><list style="symbols"><t>The<ul spacing="normal"> <li>The data plane is generic and extensible enough to support current and future controlplanes.</t> <t>Tunnelplanes.</li> <li>Tunnel components are efficiently implementable in both hardware and software without restricting capabilities to the lowest commondenominator.</t> <t>Highdenominator.</li> <li>High performance over existing IPfabrics.</t> </list> </t>fabrics is maintained.</li> </ul> <t> These requirements are described further in the following subsections.</t> <sectiontitle="Controlanchor="sec-2.1" numbered="true" toc="default"> <name>Control PlaneIndependence" anchor="section-2.1"><t>Independence</name> <t> Although some protocols for network virtualization have included a control plane as part of the tunnel format specification (most notably, VXLAN <xreftarget="RFC7348"/>target="RFC7348" format="default"/> prescribed amulticast learning- basedmulticast-learning-based control plane), these specifications have largely been treated as describing only the data format. The VXLAN packet format has actually seen a wide variety of control planes built on top of it.</t> <t> There is a clear advantage in settling on a data format: most of the protocols are only superficially different and there is little advantage in duplicating effort. However, the same cannot be said of control planes, which are diverse in very fundamental ways. The case for standardization is also less clear given the wide variety in requirements, goals, and deployment scenarios.</t> <t> As a result of this reality, Geneve is a pure tunnel format specification that is capable of fulfilling the needs of many control planes by explicitly not selecting any one of them. This simultaneously promotes a shared data format and reduces the chance of obsolescence by future control plane enhancements.</t> </section> <sectiontitle="Dataanchor="sec-2.2" numbered="true" toc="default"> <name>Data PlaneExtensibility" anchor="section-2.2"><t>Extensibility</name> <t> Achieving the level of flexibility needed to support current and future control planes effectively requires an options infrastructure to allow new metadata types to be defined, deployed, and either finalized or retired. Options also allow for differentiation of products by encouraging independent development in each vendor's core specialty, leading to an overall faster pace of advancement. Byfarfar, the most common mechanism for implementing options isType-Length- Valuethe Type-Length-Value (TLV) format.</t> <t> It should be notedthatthat, while options can be used to supportnon- wirespeednon-wirespeed control packets, they are equally importantonin data packets as wellto segregatefor segregating anddirect forwarding (fordirecting forwarding. (For instance, the examples given beforeof input port basedregarding input-port-based security policies and terminating/re-encapsulating service interposition both require tags to be placed on datapackets).packets.) Therefore, while it would be desirable to limit the extensibility to only control packets for the purposes of simplifying the datapath, that would not satisfy the design requirements.</t> <sectiontitle="Efficient Implementation" anchor="section-2.2.1"><t>anchor="sec-2.2.1" numbered="true" toc="default"> <name>Efficient Implementation</name> <t> There is often a conflict between software flexibility and hardware performance that is difficult to resolve. For a given set of functionality, it is obviously desirable to maximize performance. However, that does not mean new features that cannot be run at a desired speed today should be disallowed. Therefore, for a protocol to be considered efficientlyimplementable means thatimplementable, it is expected to have a set of common capabilities that can be reasonably handled across platformsalong withas well as a graceful mechanism to handle more advanced features in the appropriate situations.</t> <t> The use of avariable lengthvariable-length header and options in a protocol often raises questions about whetheritthe protocol is truly efficiently implementable in hardware. To answer this question in the context of Geneve, it is important to first divide "hardware" into two categories: tunnel endpoints and transit devices.</t> <t> Tunnel endpoints must be able to parse thevariablevariable-length header, including any options, and take action. Since these devices are actively participating in the protocol, they are the most affected by Geneve. However, as tunnel endpoints are the ultimate consumers of the data, transmitters can tailor their output to the capabilities of the recipient.</t> <t> Transit devices may be able to interpret theoptions,options; however, as non-terminating devices, transit devices do not originate or terminate the Genevepacket, hence MUST NOTpacket. Hence, they <bcp14>MUST NOT</bcp14> modify Geneve headers andMUST NOT<bcp14>MUST NOT</bcp14> insert or delete options,whichas that is the responsibility of tunnel endpoints. Options, if present in the packet,MUST<bcp14>MUST</bcp14> only be generated and terminated by tunnel endpoints. The participation of transit devices in interpreting options isOPTIONAL.</t><bcp14>OPTIONAL</bcp14>.</t> <t> Further, either tunnel endpoints or transit devicesMAY<bcp14>MAY</bcp14> use offload capabilities ofNICsNICs, such as checksumoffloadoffload, to improve the performance of Geneve packet processing. The presence of a Genevevariable lengthvariable-length header should not prevent the tunnel endpoints and transit devices from using such offload capabilities.</t> </section> </section> <sectiontitle="Useanchor="sec-2.3" numbered="true" toc="default"> <name>Use of Standard IPFabrics" anchor="section-2.3"><t>Fabrics</name> <t> IP has clearly cemented its place as the dominant transportmechanismmechanism, and many techniques have evolved over time to make it robust, efficient, and inexpensive. As a result, it is natural to use IP fabrics as a transit network for Geneve. Fortunately, the use of IP encapsulation and addressing is enough to achieve the primary goal of delivering packets to the correct point in the network through standard switching and routing.</t> <t> In addition, nearly all underlay fabrics are designed to exploit parallelism in traffic to spread load across multiple links without introducing reordering in individual flows. Theseequal cost multipathing (ECMP)ECMP techniques typically involve parsing and hashing the addresses and port numbers from the packet to select an outgoing link. However, the use of tunnels often results in poor ECMPperformanceperformance, as without additional knowledge of theprotocol asprotocol, the encapsulated traffic is hidden from the fabric bydesigndesign, and only tunnel endpoint addresses are available for hashing.</t> <t> Since it is desirable for Geneve to perform well on these existing fabrics, it is necessary for entropy from encapsulated packets to be exposed in the tunnel header. The most common technique for this is to use the UDP source port, which is discussed further in <xreftarget="section-3.3"/>.</t>target="sec-3.3" format="default"/>.</t> </section> </section> <sectiontitle="Geneveanchor="sec-3" numbered="true" toc="default"> <name>Geneve EncapsulationDetails" anchor="section-3"><t>Details</name> <t> The Geneve packet format consists of a compact tunnel header encapsulated in UDP over either IPv4 or IPv6. A small fixed tunnel header provides control information plus a base level of functionality and interoperability with a focus on simplicity. This header is then followed by a set ofvariablevariable-length options to allow for future innovation. Finally, the payload consists of a protocol data unit of the indicated type, such as an Ethernet frame. Sections <xreftarget="section-3.1"/>target="sec-3.1" format="counter"/> and <xreftarget="section-3.2"/>target="sec-3.2" format="counter"/> illustrate the Geneve packet format transported (for example) over Ethernet along with an Ethernet payload.</t> <sectiontitle="Geneveanchor="sec-3.1" numbered="true" toc="default"> <name>Geneve Packet FormatOver IPv4" anchor="section-3.1"> <figure><artwork><![CDATA[over IPv4</name> <figure> <name>Geneve Packet Format over IPv4</name> <artwork name="" type="" align="left" alt=""><![CDATA[ 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 Outer Ethernet Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Destination MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Destination MAC Address | Outer Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Ethertype=0x0800Ethertype = 0x0800 IPv4 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Outer IPv4 Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Version| IHL |Type of Service| Total Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Identification |Flags| Fragment Offset | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Time to Live |Protocol=17 UDP| Header Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Source IPv4 Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Destination IPv4 Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Outer UDP Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Source Port = xxxx | Dest Port = 6081 Geneve | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | UDP Length | UDP Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Geneve Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Ver| Opt Len |O|C| Rsvd. | Protocol Type | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Virtual Network Identifier (VNI) | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Variable Length Options| ~ Variable-Length Options ~ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Inner Ethernet Header (example payload): +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Destination MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Destination MAC Address | Inner Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Payload: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Ethertype of Original Payload | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | Original Ethernet Payload | | ||~ (Note that the original EthernetFrame's Preamble, Start Frame|frame's preamble, start ~ |Delimiter(SFD) & Frame Check Sequence(FCS)frame delimiter (SFD), and frame check sequence (FCS) are notincluded| | included, and the EthernetPayloadpayload need not be 4-bytealigned) |aligned)| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Frame Check Sequence: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | New Frame Check Sequence (FCS) for Outer Ethernet Frame | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ ]]></artwork> </figure> </section> <sectiontitle="Geneveanchor="sec-3.2" numbered="true" toc="default"> <name>Geneve Packet FormatOver IPv6" anchor="section-3.2"> <figure><artwork><![CDATA[over IPv6</name> <figure><name>Geneve Packet Format over IPv6</name> <artwork name="" type="" align="left" alt=""><![CDATA[ 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 Outer Ethernet Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Destination MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Destination MAC Address | Outer Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Outer Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Ethertype=0x86DDEthertype = 0x86DD IPv6 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Outer IPv6 Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Version| Traffic Class | Flow Label | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Payload Length | NxtHdr=17 UDP | Hop Limit | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | + + | | + Outer Source IPv6 Address + | | + + | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | + + | | + Outer Destination IPv6 Address + | | + + | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Outer UDP Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Source Port = xxxx | Dest Port = 6081 Geneve | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | UDP Length | UDP Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Geneve Header: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Ver| Opt Len |O|C| Rsvd. | Protocol Type | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Virtual Network Identifier (VNI) | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Variable Length Options| ~ Variable-Length Options ~ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Inner Ethernet Header (example payload): +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Destination MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Destination MAC Address | Inner Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Inner Source MAC Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Payload: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Ethertype of Original Payload | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | Original Ethernet Payload | | ||~ (Note that the original EthernetFrame's Preamble, Start Frame|frame's preamble, start ~ |Delimiter(SFD) & Frame Check Sequence(FCS)frame delimiter (SFD), and frame check sequence (FCS) are notincluded| | included, and the EthernetPayloadpayload need not be 4-bytealigned) |aligned)| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Frame Check Sequence: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | New Frame Check Sequence (FCS) for Outer Ethernet Frame | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ ]]></artwork> </figure> </section> <sectiontitle="UDP Header" anchor="section-3.3"><t>anchor="sec-3.3" numbered="true" toc="default"> <name>UDP Header</name> <t> The use of an encapsulating UDP <xreftarget="RFC0768"/>target="RFC0768" format="default"/> header follows the connectionless semantics of Ethernet and IP in addition to providing entropy to routers performing ECMP.TheTherefore, header fields arethereforeinterpreted as follows:</t><t><list style="hanging" hangIndent="3"><t hangText="Source port:"><dl newline="false" spacing="normal" indent="3"> <dt>Source Port:</dt> <dd> <t> A source port selected by the originating tunnel<vspace blankLines="0"/>endpoint. This source portSHOULD<bcp14>SHOULD</bcp14> be the same for all packets belonging to a single encapsulated flow to prevent reordering due to the use of different paths. To encourage an even distribution of flows across multiple links, the source portSHOULD<bcp14>SHOULD</bcp14> be calculated using a hash of the encapsulated packet headers using, for example, a traditional 5-tuple. Since the port represents a flow identifier rather than a true UDP connection, the entire 16-bit rangeMAY<bcp14>MAY</bcp14> be used to maximize entropy. In addition to setting the source port, for IPv6, the flow labelMAY<bcp14>MAY</bcp14> also be used for providing entropy. For an example of using the IPv6 flow label for tunnel use cases, see <xreftarget="RFC6438"/>. <vspace blankLines="1"/>target="RFC6438" format="default"/>. </t> <t> If Geneve traffic is shared with other UDP listeners on the same IP address, tunnel endpointsSHOULD<bcp14>SHOULD</bcp14> implement a mechanism to ensure ICMP return traffic arising from network errors is directed to the correct listener. The definition of such a mechanism is beyond the scope of this document. </t><t hangText="Dest port:"></dd> <dt>Dest Port:</dt> <dd> <t> IANA has assigned port 6081 as the fixed well-known<vspace blankLines="0"/>destination port for Geneve. Although the well-known value should be used by default, it isRECOMMENDED<bcp14>RECOMMENDED</bcp14> that implementations make this configurable. The chosen port is used for identification of Geneve packets andMUST NOT<bcp14>MUST NOT</bcp14> be reversed for different ends of a connection as is done with TCP. It is the responsibility of the control planeforto manage any reconfiguration of the assigned port and its interpretation by respective devices. The definition of the control plane is beyond the scope of this document. </t><t hangText="UDP length:"></dd> <dt>UDP Length:</dt> <dd> <t> The length of the UDP packet including the UDPheader. <vspace blankLines="0"/> </t> <t hangText="UDP checksum:">header.</t> </dd> <dt>UDP Checksum:</dt> <dd> <t> In order to protect the Geneve header,optionsoptions, and<vspace blankLines="0"/>payload from potential data corruption, the UDP checksumSHOULD<bcp14>SHOULD</bcp14> be generated as specified in <xreftarget="RFC0768"/>target="RFC0768" format="default"/> and <xreftarget="RFC1112"/>target="RFC1122" format="default"/> when Geneve is encapsulated in IPv4. To protect the IP header, Geneve header,optionsoptions, and payload from potential data corruption, the UDP checksumMUST<bcp14>MUST</bcp14> be generated by default as specified in <xreftarget="RFC0768"/>target="RFC0768" format="default"/> and <xreftarget="RFC8200"/>target="RFC8200" format="default"/> when Geneve is encapsulated in IPv6, exceptforunder certain conditions, which are outlined in the next paragraph. Upon receiving such packets with a non-zero UDP checksum, the receiving tunnel endpointsMUST<bcp14>MUST</bcp14> validate the checksum. If the checksum is not correct, the packetMUST<bcp14>MUST</bcp14> bedropped, otherwisedropped; otherwise, the packetMUST<bcp14>MUST</bcp14> be accepted for decapsulation.<vspace blankLines="1"/></t> <t> Under certain conditions, the UDP checksumMAY<bcp14>MAY</bcp14> be set to zero on transmit for packets encapsulated in both IPv4 and IPv6 <xreftarget="RFC8200"/>.target="RFC8200" format="default"/>. See <xreftarget="section-4.3"/>target="sec-4.3" format="default"/> for additional requirements that apply when using zero UDP checksum with IPv4 and IPv6. Disabling the use of UDP checksums is an operational consideration that should take into account the risks and effects of packet corruption. </t></list> </t></dd> </dl> </section> <sectiontitle="Tunnelanchor="sec-3.4" numbered="true" toc="default"> <name>Tunnel HeaderFields" anchor="section-3.4"><t><list style="hanging" hangIndent="3"><t hangText="VerFields</name> <dl newline="false" spacing="normal" indent="3"> <dt>Ver (2bits):">bits):</dt> <dd> <t> The current version number is 0. Packets received by<vspace blankLines="0"/>a tunnel endpoint with an unknown versionMUST<bcp14>MUST</bcp14> be dropped. Transit devices interpreting Geneve packets with an unknown version numberMUST<bcp14>MUST</bcp14> treat them as UDP packets with an unknown payload. </t><t hangText="Opt</dd> <dt>Opt Len (6bits):">bits):</dt> <dd> <t> The length of theoptionsoption fields, expressed in<vspace blankLines="0"/> four byte4-byte multiples, not including theeight byte8-byte fixed tunnel header. This results in a minimum total Geneve header size of 8 bytes and a maximum of 260 bytes. The start of the payload headers can be found using this offset from the end of the base Geneve header.<vspace blankLines="1"/></t> <t> Transit devicesMUST<bcp14>MUST</bcp14> maintain consistent forwarding behavior irrespective of the value of 'Opt Len', including ECMP link selection. </t><t hangText="O</dd> <dt>O (1bit):">bit):</dt> <dd> <t> Control packet. This packet contains a control message.<vspace blankLines="0"/>Control messages are sent between tunnel endpoints. Tunnel endpointsMUST NOT<bcp14>MUST NOT</bcp14> forward thepayloadpayload, and transit devicesMUST NOT<bcp14>MUST NOT</bcp14> attempt to interpret it. Since control messages are less frequent, it isRECOMMENDED<bcp14>RECOMMENDED</bcp14> that tunnel endpoints direct these packets to ahigh priorityhigh-priority control queue (for example, to direct the packet to a general purpose CPU from a forwardingASICApplication-Specific Integrated Circuit (ASIC) or to separate out control traffic on a NIC). Transit devicesMUST NOT<bcp14>MUST NOT</bcp14> alter forwarding behavior on the basis of this bit, such as ECMP link selection. </t><t hangText="C</dd> <dt>C (1bit):">bit):</dt> <dd> <t> Critical options present. One or more options has the<vspace blankLines="0"/>critical bit set (see <xreftarget="section-3.5"/>).target="sec-3.5" format="default"/>). If this bit issetset, then tunnel endpointsMUST<bcp14>MUST</bcp14> parse the options list to interpret any critical options. On tunnel endpoints where option parsing is notsupportedsupported, the packetMUST<bcp14>MUST</bcp14> be dropped on the basis of the 'C' bit in the base header. If the bit is notsetset, tunnel endpointsMAY<bcp14>MAY</bcp14> strip all options using 'Opt Len' and forward the decapsulated packet. Transit devicesMUST NOT<bcp14>MUST NOT</bcp14> drop packets on the basis of this bit. </t><t hangText="Rsvd.</dd> <dt>Rsvd. (6bits):">bits):</dt> <dd> <t> Reserved field, whichMUST<bcp14>MUST</bcp14> be zero on transmission<vspace blankLines="0"/>andMUST<bcp14>MUST</bcp14> be ignored on receipt. </t><t hangText="Protocol</dd> <dt>Protocol Type (16bits):">bits):</dt> <dd> <t> The type oftheprotocol data unit<vspace blankLines="0"/>appearing after the Geneve header. This follows theEtherTypeEthertype <xreftarget="ETYPES"/> convention;target="ETYPES" format="default"/> convention, with Ethernet itself being represented by the value 0x6558. </t><t hangText="Virtual</dd> <dt>Virtual Network Identifier (VNI) (24bits):">bits):</dt> <dd> <t> An identifier for a<vspace blankLines="0"/>unique element of a virtual network. In manysituationssituations, this may represent an L2segment,segment; however, the control plane defines the forwarding semantics of decapsulated packets. The VNIMAY<bcp14>MAY</bcp14> be used as part of ECMP forwarding decisions orMAY<bcp14>MAY</bcp14> be used as a mechanism to distinguish between overlapping address spaces contained in the encapsulated packet when load balancing across CPUs. </t><t hangText="Reserved</dd> <dt>Reserved (8bits):">bits):</dt> <dd> <t> Reservedfieldfield, whichMUST<bcp14>MUST</bcp14> be zero on transmission<vspace blankLines="0"/>and ignored on receipt. </t></list> </t></dd> </dl> </section> <sectiontitle="Tunnel Options" anchor="section-3.5"><figure><artwork><![CDATA[anchor="sec-3.5" numbered="true" toc="default"> <name>Tunnel Options</name> <figure anchor="geneve-options"> <name>Geneve Option</name> <artwork name="" type="" align="left" alt=""><![CDATA[ 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Option Class | Type |R|R|R| Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |Variable Option Data| ~ Variable-Length Option Data ~ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+Geneve Option]]></artwork> </figure> <t> The base Geneve header is followed by zero or more options inType- Length-ValueType-Length-Value format. Each option consists of afour byte4-byte option header and a variable amount of option data interpreted according to the type.</t><t><list style="hanging" hangIndent="3"><t hangText="Option<dl newline="false" spacing="normal" indent="3"> <dt>Option Class (16bits):">bits):</dt> <dd> <t> Namespace for the 'Type' field. IANAwill <vspace blankLines="0"/> be requested to createhas created a "Geneve Option Class" registry to allocate identifiers for organizations, technologies, and vendors that have an interest in creating types for options. Each organization may allocate types independently to allow experimentation and rapid innovation. It is expectedthatthat, overtimetime, certain options will become wellknownknown, and a given implementation may use option types from a variety of sources. In addition, IANAwill be requested to reservehas reserved specific ranges for allocation by IETF Review and for Experimental Use (see <xreftarget="section-7"/>).target="sec-7" format="default"/>). </t><t hangText="Type</dd> <dt>Type (8bits):">bits):</dt> <dd> <t> Type indicating the format of the data contained in<vspace blankLines="0"/>this option. Options are primarily designed to encourage future extensibility andinnovationinnovation, andsostandardized forms of these options will be defined in separate documents.<vspace blankLines="1"/></t> <t> Thehigh orderhigh-order bit of the option type indicates that this is a critical option. If the receiving tunnel endpoint does not recognizethisthe option and this bit issetset, then the packetMUST<bcp14>MUST</bcp14> be dropped. If this bit is set in anyoptionoption, then the 'C' bit in the Geneve base headerMUST<bcp14>MUST</bcp14> also be set. Transit devicesMUST NOT<bcp14>MUST NOT</bcp14> drop packets on the basis of this bit. The following figure shows the location of the 'C' bit in the 'Type' field: </t></list> </t> <figure><artwork><![CDATA[</dd> </dl> <figure><name>'C' Bit in the 'Type' Field</name> <artwork name="" type="" align="left" alt=""><![CDATA[ 0 1 2 3 4 5 6 7 8 +-+-+-+-+-+-+-+-+ |C| Type | +-+-+-+-+-+-+-+-+ ]]></artwork> </figure><t><list hangIndent="3" style="hanging"><t><dl newline="false" spacing="normal" indent="3"> <dt/> <dd> The requirement to drop a packet with an unknown option with the 'C' bit set applies to the entire tunnel endpoint system and not a particular component of the implementation. For example, in a system comprised of a forwarding ASIC and a general purpose CPU, this does not mean that the packet must be dropped in the ASIC. An implementation may send the packet to the CPU using a rate-limited control channel for slow-path exceptionhandling.</t> </list> </t> <t><list style="hanging" hangIndent="3"><t hangText="Rhandling.</dd> </dl> <dl newline="false" spacing="normal" indent="3"> <dt>R (3bits):">bits):</dt> <dd> Option control flags reserved for future use. These bitsMUST<bcp14>MUST</bcp14> be zero on transmission andMUST<bcp14>MUST</bcp14> be ignored on receipt.</t> <t hangText="Length</dd> <dt>Length (5bits):">bits):</dt> <dd> <t> Length of the option, expressed infour byte <vspace blankLines="0"/> multiples4-byte multiples, excluding the option header. The total length of each option may be between 4 and 128 bytes. A value of 0 in theLength'Length' field implies an option with only an option header and novariableoption data. Packets in which the total length of all options is not equal to the 'Opt Len' in the base header are invalid andMUST<bcp14>MUST</bcp14> be silently dropped if received by a tunnel endpoint that processes the options. </t><t hangText="Variable</dd> <dt>Variable-Length OptionData:">Data:</dt> <dd> <t> Option data interpreted according to 'Type'.<vspace blankLines="0"/> </t> </list></t> </dd> </dl> <sectiontitle="Options Processing" anchor="section-3.5.1"><t>anchor="sec-3.5.1" numbered="true" toc="default"> <name>Options Processing</name> <t> Geneve options are intended to be originated and processed by tunnel endpoints. However, optionsMAY<bcp14>MAY</bcp14> be interpreted by transit devices along the tunnel path. Transit devices not interpreting Geneve headers (which may or may not include options)MUST<bcp14>MUST</bcp14> handle Geneve packets as any other UDP packet and maintain consistent forwarding behavior.</t> <t> In tunnel endpoints, the generation and interpretation of options is determined by the control plane, which is beyond thethescope of this document. However, to ensure interoperability between heterogeneousdevicesdevices, some requirements are imposed on options and the devices that process them:</t><t><list style="symbols"><t>Receiving<ul spacing="normal"> <li>Receiving tunnel endpointsMUST<bcp14>MUST</bcp14> drop packets containing unknown options with the 'C' bit set in the option type. Conversely, transit devicesMUST NOT<bcp14>MUST NOT</bcp14> drop packets as a result of encountering unknown options, including those with the 'C' bitset.</t> <t>Theset.</li> <li>The contents of the options and their orderingMUST NOT<bcp14>MUST NOT</bcp14> be modified by transitdevices.</t> <t>Ifdevices.</li> <li>If a tunnel endpoint receives a Geneve packet with an 'Opt Len'(total(the total length of all options) that exceeds theoptions processingoptions-processing capability of the tunnelendpointendpoint, then the tunnel endpointMUST<bcp14>MUST</bcp14> drop such packets. An implementation may raise an exception to the control planeofin such an event. It is the responsibility of the control plane to ensure the communicating peer tunnel endpoints have the processing capability to handle the total length of options. The definition of the control plane is beyond the scope of thisdocument.</t> </list> </t>document.</li> </ul> <t> When designing a Geneve option, it is important to consider how the option will evolve in the future. Once an option isdefineddefined, it is reasonable to expect that implementations may come to depend on a specific behavior. As a result, the scope of any future changes must be carefully described upfront.</t> <t> Architecturally, options are intended to beself-descriptiveself descriptive and independent. This enables parallelism inoptionoptions processing and reduces implementation complexity. However, the control plane may impose certain orderingrestrictionsrestrictions, as described in <xreftarget="section-4.5.1"/>.</t>target="sec-4.5.1" format="default"/>.</t> <t> Unexpectedly significant interoperability issues may result from changing the length of an option that was defined to be a certain size. A particular option is specified to have either a fixed length, which is constant, or a variable length, which may change over time or for different use cases. This property is part of the definition of the option and is conveyed by the 'Type'. Forfixed lengthfixed-length options, some implementations may choose to ignore thelength'Length' field in the option header and instead parse based on thewell knownwell-known length associated with the type. In this case, redefining the length will impact not only the parsing of the option in question but also any options that follow. Therefore, options that are defined to be a fixed length in sizeMUST NOT<bcp14>MUST NOT</bcp14> be redefined to a different length. Instead, a new 'Type' should be allocated. Actual definition of the option type is beyond the scope of this document. The option type and its interpretation should be defined by the entity that owns the option class.</t> <t> Options may be processed by NIC hardware utilizing offloads(e.g.(e.g., LSO and LRO) as described in <xreftarget="section-4.6"/>.target="sec-4.6" format="default"/>. Careful consideration should be given to how the offload capabilities outlined in <xreftarget="section-4.6"/>target="sec-4.6" format="default"/> impact an option's design. </t> </section> </section> </section> <sectiontitle="Implementationanchor="sec-4" numbered="true" toc="default"> <name>Implementation and DeploymentConsiderations" anchor="section-4">Considerations</name> <sectiontitle="Applicability Statement" anchor="section-4.1"><t>anchor="sec-4.1" numbered="true" toc="default"> <name>Applicability Statement</name> <t> Geneve is a UDP-based network virtualization overlay encapsulation protocol designed to establish tunnels between NVEs over an existing IP network. It is intended for use in public or private data center environments, for deploying multi-tenant overlay networks over an existing IP underlay network.</t> <t>Geneve is a UDP based encapsulation protocol transported over existing IPv4 and IPv6 networks. Hence, asAs aUDP basedUDP-based protocol, Geneve adheres to the UDP usage guidelines as specified in <xreftarget="RFC8085"/>.target="RFC8085" format="default"/>. The applicability of these guidelinesareis dependent on the underlay IP network and the nature of the Geneve payload protocol(example(for example, TCP/IP, IP/Ethernet).</t> <t> Geneve is intended to be deployed in a data center network environment operated by a single operator or an adjacent set of cooperating network operators that fits with the definition of controlled environments in <xreftarget="RFC8085"/>.target="RFC8085" format="default"/>. A network in a controlled environment can be managed to operate under certainconditionsconditions, whereas in the generalInternetInternet, this cannot be done.HenceHence, requirements for atunneltunneling protocol operating under a controlled environment can be less restrictive than the requirements of the general Internet. </t> <t> For the purpose of this document, a traffic-managed controlled environment (TMCE) is defined as an IP network that istraffic-engineeredtraffic engineered and/or otherwise managed (e.g., via use of traffic rate limiters) to avoid congestion. The concept of a TMCE is outlined in <xreftarget="RFC8086"/>.target="RFC8086" format="default"/>. Significant portions of the text in <xreftarget="section-4.1"/>target="sec-4.1" format="default"/> through <xreftarget="section-4.3"/>target="sec-4.3" format="default"/> are based on <xreftarget="RFC8086"/>target="RFC8086" format="default"/> as applicable to Geneve.</t> <t> It is the responsibility of the operator to ensure that the guidelines/requirements in this section are followed as applicable to their Geneve deployment(s).</t> </section> <sectiontitle="Congestion Control Functionality" anchor="section-4.2"><t>anchor="sec-4.2" numbered="true" toc="default"> <name>Congestion-Control Functionality</name> <t> Geneve does not natively providecongestion controlcongestion-control functionality and relies on the payload protocol traffic for congestion control. Assuchsuch, GeneveMUST<bcp14>MUST</bcp14> be used withcongestion controlledcongestion-controlled traffic or within anetwork that is traffic managedTMCE to avoidcongestion (TMCE).congestion. An operator of atraffic managed network (TMCE)TMCE may avoid congestionbythrough careful provisioning of their networks, rate-limitingofuser datatraffictraffic, and managing traffic engineering according to path capacity.</t> </section> <sectiontitle="UDP Checksum" anchor="section-4.3"><t> In orderanchor="sec-4.3" numbered="true" toc="default"> <name>UDP Checksum</name> <t> The outer UDP checksum <bcp14>SHOULD</bcp14> be used with Geneve when transported over IPv4; this is to provide integrityoffor the Geneve headers,optionsoptions, andpayload,payload in case of data corruption (forexampleexample, to avoid misdelivery of the payload to different tenantsystems) in case of data corruption, the outer UDP checksum SHOULD be used with Geneve when transported over IPv4.systems). The UDP checksum provides a statistical guarantee that a payload was not corrupted in transit. These integrity checks are not strong from a coding or cryptographic perspective and are not designed to detect physical-layer errors or malicious modification of the datagram (seeSection 3.4 of<xreftarget="RFC8085"/>).target="RFC8085" sectionFormat="of" section="3.4"/>). In deployments where such a risk exists, an operatorSHOULD<bcp14>SHOULD</bcp14> use additional data integrity mechanisms such as those offered by IPsec (see <xreftarget="section-6.2"/>).</t>target="sec-6.2" format="default"/>).</t> <t> An operatorMAY<bcp14>MAY</bcp14> choose to disable UDP checksums and use zerochecksumsUDP checksum if Geneve packet integrity is provided by other data integritymechanismsmechanisms, such as IPsec or additionalchecksumschecksums, or if one of the conditions (a, b, or c) in <xreftarget="section-4.3.1"/> a, b, c aretarget="sec-4.3.1" format="default"/> is met.</t> <t> By default, UDP checksumsMUST<bcp14>MUST</bcp14> be used when Geneve is transported over IPv6. A tunnel endpointMAY<bcp14>MAY</bcp14> be configured for use with zero UDP checksum if additional requirements in <xreftarget="section-4.3.1"/>target="sec-4.3.1" format="default"/> are met.</t> <sectiontitle="UDP Zeroanchor="sec-4.3.1" numbered="true" toc="default"> <name>Zero UDP Checksum Handling withIPv6" anchor="section-4.3.1"><t>IPv6</name> <t> When Geneve is used over IPv6, the UDP checksum is used to protect IPv6 headers, UDPheadersheaders, and Geneve headers,optionsoptions, and payload from potential data corruption. Assuchsuch, bydefaultdefault, GeneveMUST<bcp14>MUST</bcp14> use UDP checksums when transported over IPv6. An operatorMAY<bcp14>MAY</bcp14> choose to configureto operate withzero UDP checksum if operating in atraffic managed controlled environmentTMCE as stated in <xreftarget="section-4.1"/>target="sec-4.1" format="default"/> if one of the following conditionsareis met.</t><t><list style="letters"><t>It<ol spacing="normal" type="a"> <li>It is known thatthepacket corruption is exceptionally unlikely (perhaps based on knowledge of equipment types in their underlay network) and the operator is willing totake ariskofundetected packetcorruption</t> <t>Itcorruption.</li> <li>It is judged through observational measurements (perhaps through historic or current traffic flows that usenon zeronon-zero checksum) that the level of packet corruption is tolerably low and is where the operator is willing totake theriskofundetectedcorruption.</t> <t>Genevecorruption.</li> <li>The Geneve payload is carrying applications that are tolerant of misdelivered or corrupted packets (perhaps throughhigher layerhigher-layer checksum validation and/or reliability throughretransmission) </t> </list> </t>retransmission). </li> </ol> <t> Inadditionaddition, Geneve tunnel implementations using zero UDP checksumMUST<bcp14>MUST</bcp14> meet the following requirements:</t><t><list style="numbers"><t>Use<ol spacing="normal" type="1"> <li>Use of UDP checksum over IPv6MUST<bcp14>MUST</bcp14> be the default configuration for all Genevetunnels.</t> <t>Iftunnels.</li> <li>If Geneve is used with zero UDP checksum overIPv6IPv6, then such a tunnel endpoint implementationMUST<bcp14>MUST</bcp14> meet all the requirements specified inSection 4 of<xreftarget="RFC6936"/>target="RFC6936" sectionFormat="of" section="4"/> and requirement 1 as specified inSection 5 of<xreftarget="RFC6936"/> as thattarget="RFC6936" sectionFormat="of" section="5"/> since it is relevant toGeneve.</t> <t>TheGeneve.</li> <li>The Geneve tunnel endpoint that decapsulates the tunnelSHOULD<bcp14>SHOULD</bcp14> check that the source and destination IPv6 addresses are valid for the Geneve tunnel that is configured to receive zero UDP checksum and discard other packets for which such a checkfails.</t>fails.</li> <li> <t>The Geneve tunnel endpoint that encapsulates the tunnelMAY<bcp14>MAY</bcp14> use different IPv6 source addresses for each Geneve tunnel that uses zero UDP checksum mode in order to strengthen the decapsulator's check of the IPv6 source address(i.e(i.e., the same IPv6 source address is not to be used with more than one IPv6 destination address, irrespective of whether that destination address is a unicast or multicast address). When this is not possible, it isRECOMMENDED<bcp14>RECOMMENDED</bcp14> to use each source address for as few Geneve tunnels that use zero UDP checksum as is feasible.<vspace blankLines="1"/></t> <t> Note that(forfor requirements 3 and4)4, the receiving tunnel endpoint can apply these checks only if it has out-of-band knowledge that the encapsulating tunnel endpoint is applying the indicated behavior. One possibility to obtain this out-of-band knowledge is through signaling by the control plane. The definition of the control plane is beyond the scope of this document.</t><t>Measures SHOULD</li> <li>Measures <bcp14>SHOULD</bcp14> be taken to prevent Geneve traffic over IPv6 with zero UDP checksum from escaping into the general Internet. Examples of such measures include employing packet filters at the gateways or edge of the Geneve network and/or keeping logical or physical separation of the Geneve network from networks carryingthegeneral Internettraffic.</t> </list> </t>traffic.</li> </ol> <t> The above requirements do not changeeitherthe requirements specified in either <xreftarget="RFC8200"/>target="RFC8200" format="default"/> orthe requirements specified in<xreftarget="RFC6936"/>.target="RFC6936" format="default"/>. </t> <t>The use of the source IPv6 address in addition to the destination IPv6 address, plus the recommendation against reuse of source IPv6 addresses among Genevetunnelstunnels, collectively provide some mitigation for the absence of UDP checksum coverage of the IPv6 header. A traffic-managed controlled environment that satisfies at least one of the three conditions listed at the beginning of this section provides additional assurance. </t><t> Editorial Note (The following paragraph to be removed by the RFC Editor before publication) </t> <t> It was discussed during TSVART early review if the level of requirement for using different IPv6 source addresses for different tunnel destinations would need to be "MAY" or "SHOULD". The discussion concluded that it was appropriate to keep this as "MAY", since it was considered not realistic for control planes having to maintain a high level of state on a per tunnel destination basis. In addition, the text above provides sufficient guidance to operators and implementors on possible mitigations.</t></section> </section> <sectiontitle="Encapsulationanchor="sec-4.4" numbered="true" toc="default"> <name>Encapsulation of Geneve inIP" anchor="section-4.4"><t>IP</name> <t> As an IP-basedtunneltunneling protocol, Geneve shares many properties and techniques with existing protocols. The application of some of these are described in further detail,althoughalthough, ingeneralgeneral, most concepts applicable to the IP layer or to IP tunnels generally also function in the context of Geneve.</t> <sectiontitle="IP Fragmentation" anchor="section-4.4.1">anchor="sec-4.4.1" numbered="true" toc="default"> <name>IP Fragmentation</name> <t> It isstrongly RECOMMENDED<bcp14>RECOMMENDED</bcp14> that Path MTU Discovery(<xref target="RFC1191"/>,(see <xref target="RFC1191" format="default"/> and <xreftarget="RFC8201"/>)target="RFC8201" format="default"/>) be used to prevent or minimize fragmentation. The use of Path MTU Discovery on the transit network provides the encapsulating tunnel endpoint with soft-state information about the link that it may use to prevent or minimize fragmentation depending on its role in the virtualized network. The NVE can maintain this state (the MTU size of the tunnel link(s) associated with the tunnel endpoint), so if a tenant system sends large packetsthatthat, whenencapsulatedencapsulated, exceed the MTU size of the tunnel link, the tunnel endpoint can discard such packets and send exception messages to the tenant system(s). If the tunnel endpoint is associated with a routing or forwarding function and/or has the capability to send ICMP messages, the encapsulating tunnel endpointMAY<bcp14>MAY</bcp14> send ICMP fragmentation needed <xreftarget="RFC0792"/>target="RFC0792" format="default"/> or Packet Too Big <xreftarget="RFC4443"/>target="RFC4443" format="default"/> messages to the tenant system(s). When determining the MTU size of a tunnel link, the maximum length of optionsMUST<bcp14>MUST</bcp14> be assumed as options may vary on a per-packet basis.For example, recommendations/guidanceRecommendations and guidance for handling fragmentation in similar overlay encapsulation services likePWE3Pseudowire Emulation Edge-to-Edge (PWE3) are provided inSection 5.3 of<xreftarget="RFC3985"/>.</t>target="RFC3985" sectionFormat="of" section="5.3"/>.</t> <t> Note that some implementations may not be capable of supporting fragmentation or other less common features of the IP header, such as options and extension headers.For example, someSome of the issues associated with MTU size and fragmentation in IP tunneling and use of ICMP messagesisare outlined inSection 4.2 of<xreftarget="I-D.ietf-intarea-tunnels"/>.</t>target="I-D.ietf-intarea-tunnels" sectionFormat="of" section="4.2"/>.</t> </section> <sectiontitle="DSCP, ECNanchor="sec-4.4.2" numbered="true" toc="default"> <name>DSCP, ECN, andTTL" anchor="section-4.4.2"><t>TTL</name> <t> When encapsulating IP (including over Ethernet) packets in Geneve, there are several considerations for propagatingDSCP and ECNDifferentiated Services Code Point (DSCP) and Explicit Congestion Notification (ECN) bits from the inner header to the tunnel on transmission and the reverse on reception.</t> <t> <xreftarget="RFC2983"/>target="RFC2983" format="default"/> provides guidance for mapping DSCP between inner and outer IP headers. Network virtualization is typically more closely aligned with the Pipe model described, where the DSCP value on the tunnel header is set based on a policy (which may be a fixed value, one based on the inner trafficclass,class or some other mechanism for grouping traffic). Aspects of the Uniform model (which treats the inner and outer DSCPvaluevalues as a single field by copying on ingress and egress) may also apply, such as the ability toremarkre-mark the inner header on tunnel egress based on transit marking. However, the Uniform model is not conceptually consistent with network virtualization, which seeks to provide strong isolation between encapsulated traffic and the physical network.</t> <t> <xreftarget="RFC6040"/>target="RFC6040" format="default"/> describes the mechanism for exposing ECN capabilities on IP tunnels and propagating congestion markers to the inner packets. This behaviorMUST<bcp14>MUST</bcp14> be followed for IP packets encapsulated in Geneve.</t> <t> Though either the Uniform or Pipe models could be used for handling TTL (or Hop Limit in case of IPv6)handlingwhen tunneling IP packets, the Pipe model is morealignedconsistent with network virtualization. <xreftarget="RFC2003"/>target="RFC2003" format="default"/> provides guidance on handling TTL between inner IP header and outer IP tunnels; this model ismore aligned withsimilar to the Pipe model and isRECOMMENDED<bcp14>RECOMMENDED</bcp14> for use with Geneve for network virtualization applications.</t> </section> <sectiontitle="Broadcastanchor="sec-4.4.3" numbered="true" toc="default"> <name>Broadcast andMulticast" anchor="section-4.4.3"><t>Multicast</name> <t> Geneve tunnels may either be point-to-point unicast between two tunnel endpoints ormayutilize broadcast or multicast addressing. It is not required that inner and outer addressing match in this respect. For example, in physical networks that do not support multicast, encapsulated multicast traffic may be replicated into multiple unicast tunnels or forwarded by policy to a unicast location (possibly to be replicated there).</t> <t> With physical networks that do supportmulticastmulticast, it may be desirable to use this capability to take advantage of hardware replication for encapsulated packets. In this case, multicast addresses may be allocated in the physical network corresponding to tenants, encapsulated multicast groups, or some other factor. The allocation of these groups is a component of the control planeand thereforeand, therefore, is beyond the scope of this document.</t> <t> When physical multicast is in use, devices with heterogeneous capabilities may be present in the same group. Some options may only be interpretable by a subset of the devices in the group. Other devices can safely ignore such options unless the 'C' bit is set to mark the unknown option as critical.RequirementsThe requirements outlined in <xreftarget="section-3.4"/>target="sec-3.4" format="default"/> apply for critical options.</t> <t> In addition, <xreftarget="RFC8293"/>target="RFC8293" format="default"/> provides examples of various mechanisms that can be used for multicast handling in network virtualization overlay networks.</t> </section> <sectiontitle="Unidirectional Tunnels" anchor="section-4.4.4"><t>anchor="sec-4.4.4" numbered="true" toc="default"> <name>Unidirectional Tunnels</name> <t> Generally speaking, a Geneve tunnel is a unidirectional concept. IP is not aconnection oriented protocolconnection-oriented protocol, and it is possible for two tunnel endpoints to communicate with each other using different paths or to have one side not transmit anything at all. As Geneve is an IP-based protocol, the tunnel layer inherits these same characteristics.</t> <t> It is possible for a tunnel to encapsulate a protocol, such as TCP,whichthat is connection oriented and maintains session state at that layer. In addition, implementationsMAY<bcp14>MAY</bcp14> model Geneve tunnels as connected, bidirectional links,such asfor example, to provide the abstraction of a virtual port. In both of these cases, bidirectionality of the tunnel is handled at a higher layer and does not affect the operation of Geneve itself.</t> </section> </section> <sectiontitle="Constraintsanchor="sec-4.5" numbered="true" toc="default"> <name>Constraints on ProtocolFeatures" anchor="section-4.5"><t>Features</name> <t> Geneve is intended to be flexibletofor use with a wide range of current and future applications. As a result, certain constraints may be placed on the use of metadata or other aspects of the protocol in order to optimize for a particular use case. For example, some applications may limit the types of optionswhichthat are supported or enforce a maximum number or length of options. Other applications may only handle certain encapsulated payload types, such as Ethernet or IP.This couldThese optimizations can be implemented either globallythroughout(throughout thesystem or, forsystem) or locally (for example, restricted to certain classes of devices or networkpaths.</t>paths).</t> <t> These constraints may be communicated to tunnel endpoints either explicitly through a control plane or implicitly by the nature of the application. As Geneve is defined as a data plane protocol that is control plane agnostic, definition of such mechanismsareis beyond the scope of this document.</t> <sectiontitle="Constraintsanchor="sec-4.5.1" numbered="true" toc="default"> <name>Constraints onOptions" anchor="section-4.5.1"><t>Options</name> <t> While Geneve options are flexible, a control plane may restrict the number of option TLVs as well as the order and size of the TLVs between tunnel endpoints to make it simpler for a data plane implementation in software or hardware to handle (see <xreftarget="I-D.ietf-nvo3-encap"/>.target="I-D.ietf-nvo3-encap" format="default"/>). For example, there may be some criticalinformationinformation, such as a securehashhash, that must be processed in a certain order to provide the lowestlatencylatency, or there may be other scenarios where the options must be processed in acertaingiven order due to protocol semantics.</t> <t> A control plane may negotiate a subset of option TLVs and certain TLVordering, as wellordering; it may also limit the total number of option TLVs present in the packet, for example, to accommodate hardware capable of processing feweroptions <xref target="I-D.ietf-nvo3-encap"/>.options. Hence, a control plane needs to have the ability to describe the supportedTLVsTLV subset andtheir orderits ordering to the tunnel endpoints. In the absence of a control plane, alternative configuration mechanisms may be used for this purpose. Such mechanisms are beyond the scope of this document.</t> </section> </section> <sectiontitle="NIC Offloads" anchor="section-4.6"><t>anchor="sec-4.6" numbered="true" toc="default"> <name>NIC Offloads</name> <t> Modern NICs currently provide a variety of offloads to enable the efficient processing of packets. The implementation of many of these offloads requires only that the encapsulated packet be easily parsed (for example, checksum offload). However, optimizations such as LSO and LRO involve some processing of the options themselves since they must be replicated/merged across multiple packets. In these situations, it is desirabletonot to require changes to the offload logic to handle the introduction of new options. To enable this, some constraints are placed on the definitions of options to allow for simple processing rules:</t><t><list style="symbols"><t>When<ul spacing="normal"> <li>When performing LSO, a NICMUST<bcp14>MUST</bcp14> replicate the entire Geneve header and all options, including those unknown to the device, onto each resulting segment unless an option allows an exception. Conversely, when performing LRO, a NIC may assume that a binary comparison of the options (including unknown options) is sufficient to ensure equality andMAY<bcp14>MAY</bcp14> merge packets with equal Geneveheaders.</t> <t>Options MUST NOTheaders.</li> <li>Options <bcp14>MUST NOT</bcp14> be reordered during the course of offload processing, including when merging packets for the purpose ofLRO.</t> <t>NICsLRO.</li> <li>NICs performing offloadsMUST NOT<bcp14>MUST NOT</bcp14> drop packets with unknown options, including those marked as critical, unless explicitlyconfigured.</t> </list> </t>configured to do so.</li> </ul> <t> There is no requirement that a given implementation of Geneve employ the offloads listed as examples above. However, as these offloads are currently widely deployed in commercially available NICs, the rules described here are intended to enable efficient handling of current and future options across a variety of devices.</t> </section> <sectiontitle="Inneranchor="sec-4.7" numbered="true" toc="default"> <name>Inner VLANHandling" anchor="section-4.7"><t>Handling</name> <t> Geneve is capable of encapsulating a wide range ofprotocols and thereforeprotocols; therefore, a given implementation is likely to support only a small subset of the possibilities. However, as Ethernet is expected to be widely deployed, it is useful to describe the behavior of VLANs inside encapsulated Ethernet frames.</t> <t> As with any protocol, support for inner VLAN headers isOPTIONAL.<bcp14>OPTIONAL</bcp14>. In many cases, the use of encapsulated VLANs may be disallowed due to security or implementation considerations. However, in othercasescases, the trunking of VLAN frames across a Geneve tunnel can prove useful. As a result, the processing of inner VLAN tags upon ingress or egress from a tunnel endpoint is based upon the configuration of the tunnel endpoint and/or control plane and is not explicitly defined as part of the data format.</t> </section> </section> <sectiontitle="Transition Considerations" anchor="section-5"><t>anchor="sec-5" numbered="true" toc="default"> <name>Transition Considerations</name> <t> Viewed exclusively from the data plane, Geneve is compatible with existing IP networks as it appears to most devices as UDP packets. However, as there are already a number oftunneltunneling protocols deployed in network virtualization environments, there is a practical question of transition and coexistence.</t> <t> Since Geneve builds on the base data plane functionality provided by the most common protocols used for network virtualization(VXLAN, NVGRE)(VXLAN and NVGRE), it should be straightforward to port an existing control plane to run on top of it with minimal effort. With both the old and new packet formats supporting the same set of capabilities, there is no need for a hardtransition -transition; tunnel endpoints directly communicating with each other can use any common protocol, which may be different even within a single overall system. As transit devices are primarily forwarding packets on the basis of the IP header, all protocols appearsimilarto be similar, and these devices do not introduce additional interoperability concerns.</t> <t> To assist with this transition, it is strongly suggested that implementations support simultaneous operation of both Geneve and existingtunnel protocolstunneling protocols, as it is expected to be common for a single node to communicate with a mixture of other nodes. Eventually, older protocols may be phased out as they are no longer in use.</t> </section> <sectiontitle="Security Considerations" anchor="section-6"><t>anchor="sec-6" numbered="true" toc="default"> <name>Security Considerations</name> <t> As it is encapsulated within a UDP/IP packet, Geneve does not have any inherent security mechanisms. As a result, an attacker with access to the underlay network transporting the IP packets has the ability tosnoop, altersnoop on, alter, or inject packets. Compromised tunnel endpoints or transit devices may also spoof identifiers in the tunnel header to gain access to networks owned by other tenants.</t> <t> Within a particular security domain, such as a data center operated by a single service provider, the most common andhighest performinghighest-performing security mechanism is isolation of trusted components. Tunnel traffic can be carried over a separate VLAN and filtered at any untrusted boundaries.</t> <t> When crossing an untrusted link, such as the general Internet, VPN technologies such as IPsec <xreftarget="RFC4301"/>target="RFC4301" format="default"/> should be used to provide authentication and/or encryption of the IP packets formed as part of Geneve encapsulation(See(see <xreftarget="section-6.1.1"/>).</t>target="sec-6.1.1" format="default"/>).</t> <t> Geneve does not otherwise affect the security of the encapsulated packets. As per the guidelines of BCP 72 <xreftarget="RFC3552"/>,target="RFC3552" format="default"/>, the following sections describe potential security risks that may be applicable to Geneve deployments and approaches to mitigate such risks. It is also noted that not all such risks are applicable to all Geneve deployment scenarios, i.e., only a subset may be applicable to certain deployments.So anAn operator has to make an assessment based on their networkenvironment andenvironment, determine the risks that are applicable to their specificenvironmentenvironment, and use appropriate mitigation approaches as applicable. </t> <sectiontitle="Data Confidentiality" anchor="section-6.1"><t>anchor="sec-6.1" numbered="true" toc="default"> <name>Data Confidentiality</name> <t> Geneve is a network virtualization overlay encapsulation protocol designed to establish tunnels between NVEs over an existing IP network. It can be used to deploy multi-tenant overlay networks over an existing IP underlay network in a public or private data center. The overlay service is typically provided by a service provider,for examplesuch as a cloudservicesservice provider or a private data centeroperator, thisoperator. This may or not may be the same provider as an underlay service provider. Due to the nature of multi-tenancy in such environments, a tenant system may expect data confidentiality to ensure its packet data is not tampered with(active(i.e., active attack) in transit or is a target of unauthorized monitoring(passive attack)(i.e., passive attack), forexampleexample, by other tenant systems or underlay service provider. A compromised network node or a transit device within a data center may passively monitor Geneve packet data betweenNVEs;NVEs or route traffic for further inspection. A tenant may expect the overlay service provider to provide data confidentiality as part of theserviceservice, or a tenant may bring its own data confidentiality mechanisms like IPsec or TLS to protect the data end to end between its tenant systems. The overlay provider is expected to provide cryptographic protection in cases where the underlay provider is not the same as the overlay provider to ensure the payload is not exposed to the underlay.</t> <t> If an operator determines data confidentiality is necessary in their environment based on their riskanalysis,analysis -- forexample asexample, in multi-tenantenvironments,environments -- then an encryption mechanismSHOULD<bcp14>SHOULD</bcp14> be used to encrypt the tenant data end to end between the NVEs. The NVEs may use existingwell establishedwell-established encryptionmechanismsmechanisms, such as IPsec, DTLS, etc.</t> <sectiontitle="Inter-Dataanchor="sec-6.1.1" numbered="true" toc="default"> <name>Inter-Data CenterTraffic" anchor="section-6.1.1"><t>Traffic</name> <t> A tenant system in a customer premises (private data center) may want to connect to tenant systems on their tenant overlay network in a public cloud datacentercenter, or a tenant may want to have its tenant systems located in multiple geographically separated data centers for high availability. Geneve data traffic between tenant systems across such separated networks should be protected from threats when traversing public networks. Any Geneve overlay data leaving the data center network beyond the operator's security domainSHOULD<bcp14>SHOULD</bcp14> be secured by encryptionmechanismsmechanisms, such as IPsec or other VPNtechnologiestechnologies, to protect the communications between the NVEs when they are geographically separated over untrusted network links. Specification of data protection mechanisms employed between data centers is beyond the scope of this document.</t> <t> The principles described in <xreftarget="section-4"/>target="sec-4" format="default"/> regarding controlled environments still apply to the geographically separated data center usage outlined in this section.</t> </section> </section> <sectiontitle="Data Integrity" anchor="section-6.2"><t>anchor="sec-6.2" numbered="true" toc="default"> <name>Data Integrity</name> <t> Geneve encapsulation is used between NVEs to establish overlay tunnels over an existing IP underlay network. In a multi-tenant data center, a rogue or compromised tenant system may try to launch a passiveattackattack, such as monitoring the traffic of other tenants, or an activeattackattack, such as trying to inject unauthorized Geneve encapsulated traffic such as spoofing, replay, etc., into the network. To prevent such attacks, an NVEMUST NOT<bcp14>MUST NOT</bcp14> propagate Geneve packets beyond the NVE to tenant systems andSHOULD<bcp14>SHOULD</bcp14> employpacket filteringpacket-filtering mechanisms so as not to forward unauthorized traffic between tenant systems in different tenant networks. An NVEMUST NOT<bcp14>MUST NOT</bcp14> interpret Geneve packets from tenant systems other than as frames to be encapsulated.</t> <t> A compromised network node or a transit device within a data center may launch an active attack trying to tamper with the Geneve packet data between NVEs. Malicious tampering of Geneve header fields may cause the packet from one tenant to be forwarded to a different tenant network. If an operator determinesthethere is a possibility of such a threat in their environment, the operator may choose to employ data integrity mechanisms between NVEs. In order to prevent such risks, a data integrity mechanismSHOULD<bcp14>SHOULD</bcp14> be used in such environments to protect the integrity of Genevepacketspackets, including packet headers,optionsoptions, and payload on communications between NVE pairs. A cryptographic data protectionmechanismmechanism, such asIPsecIPsec, may be used to provide data integrity protection. A data center operator may choose to deploy any other data integrity mechanisms as applicable and supported in their underlay networks, although non-cryptographic mechanisms may not protect the Geneve portion of the packet from tampering. </t> </section> <sectiontitle="Authenticationanchor="sec-6.3" numbered="true" toc="default"> <name>Authentication of NVEpeers" anchor="section-6.3"><t>Peers</name> <t> A rogue network device or a compromised NVE in a data center environment might be able to spoof Geneve packets as if it came from a legitimate NVE. In order to mitigate such a risk, an operatorSHOULD<bcp14>SHOULD</bcp14> use an authentication mechanism, such asIPsecIPsec, to ensure that the Geneve packet originated from the intended NVEpeer,peer in environments where the operator determines spoofing or rogue devicesis aare potentialthreat.threats. Other simpler sourcecheckschecks, such as ingress filtering for VLAN/MAC/IPaddress,addresses, reverse path forwarding checks, etc., may be used in certain trusted environments to ensure Geneve packets originated from the intended NVE peer.</t> </section> <sectiontitle="Optionsanchor="sec-6.4" numbered="true" toc="default"> <name>Options Interpretation by TransitDevices" anchor="section-6.4"><t>Devices</name> <t> Options, if present in the packet, are generated and terminated by tunnel endpoints. As indicated in <xreftarget="section-2.2.1"/>,target="sec-2.2.1" format="default"/>, transit devices may interpret the options. However, if the packet is protected by encryption from tunnel endpoint to tunnel endpointencryption, for example(for example, throughIPsec,IPsec), transit devices will not have visibility into the Geneve header or options in the packet. In suchcasescases, transit devicesMUST<bcp14>MUST</bcp14> handle Geneve packets as any other IP packet and maintain consistent forwarding behavior. In cases where options are interpreted by transit devices, the operatorMUST<bcp14>MUST</bcp14> ensure that transit devices are trusted and not compromised. The definition of a mechanism to ensure this trust is beyond the scope of this document.</t> </section> <sectiontitle="Multicast/Broadcast" anchor="section-6.5"><t>anchor="sec-6.5" numbered="true" toc="default"> <name>Multicast/Broadcast</name> <t> In typical data center networks where IP multicasting is not supported in the underlay network, multicasting may be supported using multiple unicast tunnels. The same security requirements as described in the above sections can be used to protect Geneve communications between NVE peers. If IP multicasting is supported in the underlay network and the operator chooses to use it for multicast traffic among tunnel endpoints, then the operator in such environments may use data protectionmechanismsmechanisms, such as IPsec with multicast extensions <xreftarget="RFC5374"/>target="RFC5374" format="default"/>, to protect multicast traffic among Geneve NVE groups.</t> </section> <sectiontitle="Controlanchor="sec-6.6" numbered="true" toc="default"> <name>Control PlaneCommunications" anchor="section-6.6"><t>Communications</name> <t> A Network Virtualization Authority (NVA) as outlined in <xreftarget="RFC8014"/>target="RFC8014" format="default"/> may be used as a control plane for configuring and managing the Geneve NVEs. The data center operator is expected to use security mechanisms to protect the communications between the NVAtoand NVEs and to use authentication mechanisms to detect any rogue or compromised NVEs within their administrative domain. Data protection mechanisms for control plane communication or authentication mechanisms between the NVA andtheNVEs are beyond the scope of this document.</t> </section> </section> <sectiontitle="IANA Considerations" anchor="section-7"><t>anchor="sec-7" numbered="true" toc="default"> <name>IANA Considerations</name> <t> IANA has allocated UDP port 6081 in theService"Service Name and Transport Protocol Port NumberRegistryRegistry" <xreftarget="IANA-SN"/>target="IANA-SN" format="default"/> as the well-known destination port forGeneve based on early registration.</t> <t>Upon publication of this document, this registration will have its reference changed to cite this document [RFC-to-be] and inline with <xref target="RFC6335"/> the assignee and contact of the port entry should be changed to IESG <iesg@ietf.org> and IETFGeneve:</t> <dl newline="false" spacing="compact"> <dt>Service Name:</dt><dd>geneve</dd> <dt>Transport Protocol(s):</dt><dd>UDP</dd> <dt>Assignee:</dt><dd>IESG <iesg@ietf.org></dd> <dt>Contact:</dt><dd>IETF Chair<chair@ietf.org> respectively:</t> <figure><artwork><![CDATA[ Service Name: geneve Transport Protocol(s): UDP Assignee: IESG <iesg@ietf.org> Contact: IETF Chair <chair@ietf.org> Description: Generic<chair@ietf.org></dd> <dt>Description:</dt><dd>Generic Network Virtualization Encapsulation(Geneve) Reference: [RFC-to-be] Port Number: 6081 ]]></artwork> </figure>(Geneve)</dd> <dt>Reference:</dt><dd>[RFC8926]</dd> <dt>Port Number:</dt><dd>6081</dd> </dl> <t> In addition, IANAis requested to createhas created a new subregistry titled "Geneve Option Class"registry to allocate Option Classes.for option classes. This registryis to behas been placed under a newNetwork"Network Virtualization Overlay(NVO3) protocols page (to be created)(NVO3)" heading in the IANA protocol registries <xreftarget="IANA-PR"/>.target="IANA-PR" format="default"/>. TheGeneve"Geneve OptionClassClass" registryshall consistconsists of 16-bit hexadecimal values along with descriptive strings, assignee/contactinformationinformation, and references. The registration rules for the new registry are (as defined by <xreftarget="RFC8126"/>):</t> <texttable style="full"><ttcol> Range</ttcol> <ttcol>target="RFC8126" format="default"/>):</t> <table align="center"> <name>Geneve Option Class Registry Ranges</name> <thead> <tr> <th align="left"> Range</th> <th align="left"> RegistrationProcedures</ttcol> <c>0x0000..0x00FF</c> <c>IETF Review</c> <c>0x0100..0xFEFF</c> <c>FirstProcedures</th> </tr> </thead> <tbody> <tr> <td align="left">0x0000-0x00FF</td> <td align="left">IETF Review</td> </tr> <tr> <td align="left">0x0100-0xFEFF</td> <td align="left">First Come FirstServed</c> <c>0xFF00..0xFFFF</c> <c>Experimental Use</c> </texttable> <t> Initial registrations in the new registry are as follows:</t> <texttable style="full"><ttcol> Option Class</ttcol> <ttcol> Description</ttcol> <ttcol> Assignee/Contact </ttcol> <ttcol> References</ttcol> <c>0x0100</c> <c>Linux</c> <c></c> <c></c> <c>0x0101</c> <c>Open vSwitch (OVS)</c> <c></c> <c></c> <c>0x0102</c> <c>Open Virtual Networking (OVN)</c> <c></c> <c></c> <c>0x0103</c> <c>In-band Network Telemetry (INT)</c> <c></c> <c></c> <c>0x0104</c> <c>VMware, Inc.</c> <c></c> <c></c> <c>0x0105</c> <c>Amazon.com, Inc.</c> <c></c> <c></c> <c>0x0106</c> <c>Cisco Systems, Inc.</c> <c></c> <c></c> <c>0x0107</c> <c>Oracle Corporation</c> <c></c> <c></c> <c>0x0108..0x0110</c> <c>Amazon.com, Inc.</c> <c></c> <c></c> </texttable> </section> <section title="Contributors" anchor="section-8"><t> The following individuals were authors of an earlier version of this document and made significant contributions:</t> <figure><artwork><![CDATA[ Pankaj Garg Microsoft Corporation 1 Microsoft Way Redmond, WA 98052 USA Email: pankajg@microsoft.com Chris Wright Red Hat Inc. 1801 Varsity Drive Raleigh, NC 27606 USA Email: chrisw@redhat.com Kenneth Duda Arista Networks 5453 Great America Parkway Santa Clara, CA 95054 USA Email: kduda@arista.com Dinesh G. Dutt Independent Email: didutt@gmail.com Jon Hudson Independent Email: jon.hudson@gmail.com Ariel Hendel Facebook, Inc. 1 Hacker Way Menlo Park, CA 94025 USA Email: ahendel@fb.com ]]></artwork> </figure> </section> <section title="Acknowledgements" anchor="section-9"> <t> The authors wish to acknowledge Puneet Agarwal, David Black, Sami Boutros, Scott Bradner, Martin Casado, Alissa Cooper, Roman Danyliw, Bruce Davie, Anoop Ghanwani, Benjamin Kaduk, Suresh Krishnan, Mirja Kuhlewind, Barry Leiba, Daniel Migault, Greg Mirksy, Tal Mizrahi, Kathleen Moriarty, Magnus Nystrom, Adam Roach, Sabrina Tanamal, Dave Thaler, Eric Vyncke, Magnus Westerlund and many other members of the NVO3 WG for their reviews, comments and suggestions.</t> <t> The authors would like to thank Sam Aldrin, Alia Atlas, Matthew Bocci, Benson Schliesser, and Martin Vigoureux for their guidance throughout the process.</t>Served</td> </tr> <tr> <td align="left">0xFF00-0xFFFF</td> <td align="left">Experimental Use</td> </tr> </tbody> </table> </section> </middle> <back><references title="Normative References"> &RFC0768; &RFC0792; &RFC1112; &RFC1191; &RFC2003; &RFC2119; &RFC4443; &RFC6040; &RFC6936; &RFC7365; &RFC8085; &RFC8126; &RFC8174; &RFC8200; &RFC8201;<displayreference target="I-D.ietf-nvo3-encap" to="NVO3-ENCAP"/> <displayreference target="I-D.ietf-nvo3-dataplane-requirements" to="NVO3-DATAPLANE"/> <displayreference target="I-D.ietf-intarea-tunnels" to="INTAREA-TUNNELS"/> <references> <name>References</name> <references> <name>Normative References</name> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0768.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0792.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1122.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1191.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2003.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4443.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6040.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6936.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7365.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8085.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8126.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8200.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8201.xml"/> </references><references title="Informative References"><references> <name>Informative References</name> <reference anchor="ETYPES" target="https://www.iana.org/assignments/ieee-802-numbers"> <front> <title>IEEE 802 Numbers</title> <author><organization>The IEEE Registration Authority</organization><organization>IANA</organization> </author><date/></front> </reference>&I-D.ietf-nvo3-encap; &I-D.ietf-nvo3-dataplane-requirements; &I-D.ietf-intarea-tunnels;<xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-nvo3-encap.xml"/> <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-nvo3-dataplane-requirements.xml"/> <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-intarea-tunnels.xml"/> <reference anchor="IANA-PR" target="https://www.iana.org/protocols"> <front> <title>Protocol Registries</title> <author> <organization>IANA</organization> </author><date/></front> </reference> <reference anchor="IANA-SN" target="https://www.iana.org/assignments/service-names-port-numbers"> <front> <title>Service Name and Transport Protocol Port Number Registry</title> <author> <organization>IANA</organization> </author><date/></front> </reference><!--&IEEE.802.1Q_2014;--><referenceanchor='IEEE.802.1Q_2018' target='http://ieeexplore.ieee.org/servlet/opac?punumber=8403925'>anchor="IEEE.802.1Q_2018" target="http://ieeexplore.ieee.org/servlet/opac?punumber=8403925"> <front> <title>IEEE Standard for Local and Metropolitan Area Networks--Bridges and Bridged Networks</title> <seriesInfo name="DOI" value="10.1109/IEEESTD.2018.8403927"/> <seriesInfo name="IEEE" value="802.1Q-2018"/> <author> <organization>IEEE</organization> </author> <dateday='06' month='July' year='2018' /> <abstract><t>This standard specifies how the Media Access Control (MAC) Service is supported by Bridged Networks, the principles of operation of those networks, and the operation of MAC Bridges and VLAN Bridges, including management, protocols, and algorithms</t> </abstract>month="July" year="2018"/> </front><seriesInfo name='IEEE' value='802.1Q-2018' /> <seriesInfo name='DOI' value='10.1109/ieeestd.2018.8403927' /></reference>&RFC2983; &RFC3031; &RFC3552; &RFC3985; &RFC4301; &RFC5374; &RFC6335; &RFC6438; &RFC7348; &RFC7637; &RFC8014; &RFC8086; &RFC8293;<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2983.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3031.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3552.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3985.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4301.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5374.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6438.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7348.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7637.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8014.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8086.xml"/> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8293.xml"/> <reference anchor="VL2"target="https://www.sigcomm.org/sites/default/files/ccr/papers/2009/October/1594977-1592576.pdf">target="https://dl.acm.org/doi/10.1145/1594977.1592576"> <front> <title>VL2: A Scalable and Flexible Data Center Network</title> <seriesInfo name="DOI" value="10.1145/1594977.1592576"/> <author surname="Greenberg, A., et al."><organization></organization><organization/> </author> <dateyear="2009" />month="August" year="2009"/> </front><seriesInfo name="ACM SIGCOMM" value="Computer<refcontent>ACM SIGCOMM Computer CommunicationReview"/> <seriesInfo name="DOI" value="10.1145/1594977.1592576"/>Review</refcontent> </reference> </references> </references> <section anchor="sec-9" numbered="false" toc="default"> <name>Acknowledgements</name> <t> The authors wish to acknowledge <contact fullname="Puneet Agarwal"/>, <contact fullname="David Black"/>, <contact fullname="Sami Boutros"/>, <contact fullname="Scott Bradner"/>, <contact fullname="Martín Casado"/>, <contact fullname="Alissa Cooper"/>, <contact fullname="Roman Danyliw"/>, <contact fullname="Bruce Davie"/>, <contact fullname="Anoop Ghanwani"/>, <contact fullname="Benjamin Kaduk"/>, <contact fullname="Suresh Krishnan"/>, <contact fullname="Mirja Kühlewind"/>, <contact fullname="Barry Leiba"/>, <contact fullname="Daniel Migault"/>, <contact fullname="Greg Mirksy"/>, <contact fullname="Tal Mizrahi"/>, <contact fullname="Kathleen Moriarty"/>, <contact fullname="Magnus Nyström"/>, <contact fullname="Adam Roach"/>, <contact fullname="Sabrina Tanamal"/>, <contact fullname="Dave Thaler"/>, <contact fullname="Éric Vyncke"/>, <contact fullname="Magnus Westerlund"/>, and many other members of the NVO3 Working Group for their reviews, comments, and suggestions.</t> <t> The authors would like to thank <contact fullname="Sam Aldrin"/>, <contact fullname="Alia Atlas"/>, <contact fullname="Matthew Bocci"/>, <contact fullname="Benson Schliesser"/>, and <contact fullname="Martin Vigoureux"/> for their guidance throughout the process.</t> </section> <section anchor="sec-8" numbered="false" toc="default"> <name>Contributors</name> <t> The following individuals were authors of an earlier version of this document and made significant contributions:</t> <contact fullname="Pankaj Garg" > <organization>Microsoft Corporation</organization> <address> <postal> <street>1 Microsoft Way</street> <city>Redmond</city> <region>WA</region><code>98052</code> <country>United States of America</country> </postal> <email>pankajg@microsoft.com</email> </address> </contact> <contact fullname="Chris Wright" > <organization>Red Hat Inc.</organization> <address> <postal> <street>1801 Varsity Drive</street> <city>Raleigh</city> <region>NC</region><code>27606</code> <country>United States of America</country> </postal> <email>chrisw@redhat.com</email> </address> </contact> <contact fullname="Kenneth Duda" > <organization>Arista Networks</organization> <address> <postal> <street>5453 Great America Parkway</street> <city>Santa Clara</city> <region>CA</region><code>95054</code> <country>United States of America</country> </postal> <email>kduda@arista.com</email> </address> </contact> <contact fullname="Dinesh G. Dutt" > <organization>Independent</organization> <address> <postal> <street></street> <city></city> <region></region><code></code> <country></country> </postal> <email>didutt@gmail.com</email> </address> </contact> <contact fullname="Jon Hudson" > <organization>Independent</organization> <address> <postal> <street></street> <city></city> <region></region><code></code> <country></country> </postal> <email>jon.hudson@gmail.com</email> </address> </contact> <contact fullname="Ariel Hendel" > <organization>Facebook, Inc.</organization> <address> <postal> <street>1 Hacker Way</street> <city>Menlo Park</city> <region>CA</region><code>94025</code> <country>United States of America</country> </postal> <email>ahendel@fb.com</email> </address> </contact> </section> </back> </rfc>