wdiff rfc8900xml2.original.xml rfc8900.xml

<?xml version="1.0" encoding="US-ASCII"?> version='1.0' encoding='utf-8'?>

<!DOCTYPE rfc SYSTEM "rfc2629.dtd">
<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="3"?>
<?rfc tocindent="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>
<?rfc comments="yes"?>
<?rfc inline="yes"?>
<?rfc compact="yes"?>
<?rfc subcompact="no"?> "rfc2629-xhtml.ent">
<rfc xmlns:xi="http://www.w3.org/2001/XInclude"
category="bcp"
docName="draft-ietf-intarea-frag-fragile-17"
     ipr="trust200902">
number="8900"
consensus="true"
ipr="trust200902"
obsoletes=""
updates=""
submissionType="IETF"
xml:lang="en"
tocInclude="true"
tocDepth="3"
symRefs="true"
sortRefs="true"
version="3">
  <!-- xml2rfc v2v3 conversion 2.45.3 -->
  <front>
    <title abbrev="IP Fragmentation Fragile">IP Fragmentation Considered
    Fragile</title>
    <seriesInfo name="RFC" value="8900"/>
    <seriesInfo name="BCP" value="230"/>

    <author fullname="Ron Bonica" initials="R." surname="Bonica">
      <organization>Juniper Networks</organization>
      <address>
        <postal>
          <street>2251 Corporate Park Drive</street>
          <city>Herndon</city>
          <code>20171</code>
          <region>Virginia</region>

          <country>USA</country>
          <country>United States of America</country>
        </postal>
        <email>rbonica@juniper.net</email>
      </address>
    </author>
    <author fullname="Fred Baker" initials="F." surname="Baker">
      <organization>Unaffiliated</organization>
      <address>
        <postal>
          <street/>
          <city>Santa Barbara</city>
          <region>California</region>
          <code>93117</code>

          <country>USA</country>
          <country>United States of America</country>
        </postal>
        <email>FredBaker.IETF@gmail.com</email>
      </address>
    </author>
    <author fullname="Geoff Huston" initials="G." surname="Huston">
      <organization>APNIC</organization>
      <address>
        <postal>
          <street>6 Cordelia St</street>
          <city>Brisbane</city>
          <region>4101 QLD</region>
          <code/>
          <country>Australia</country>
        </postal>
        <email>gih@apnic.net</email>
      </address>
    </author>
    <author fullname="Robert M. Hinden" initials="R." surname="Hinden">
      <organization>Check Point Software</organization>
      <address>
        <postal>
          <street>959 Skyway Road</street>
          <city>San Carlos</city>
          <region>California</region>
          <code>94070</code>

          <country>USA</country>
          <country>United States of America</country>
        </postal>
        <email>bob.hinden@gmail.com</email>
      </address>
    </author>
    <author fullname="Ole Troan" initials="O." surname="Troan">
      <organization>Cisco</organization>
      <address>
        <postal>
          <street>Philip Pedersens vei 1</street>
          <city>N-1366 Lysaker</city>
          <country>Norway</country>
        </postal>
        <email>ot@cisco.com</email>
      </address>
    </author>
    <author fullname="Fernando Gont" initials="F." surname="Gont">
      <organization>SI6 Networks</organization>
      <address>
        <postal>
          <street>Evaristo Carriego 2644</street>
          <city>Haedo</city>
          <region>Provincia de Buenos Aires</region>
          <country>Argentina</country>
        </postal>
        <email>fgont@si6networks.com</email>
      </address>
    </author>

    <date/>
    <date year="2020" month="September"/>
    <area>Internet Area</area>
    <workgroup>Internet Area WG</workgroup>
    <keyword>IPv6</keyword>
    <keyword>Fragmentation</keyword>

    <abstract>
      <t>This document describes IP fragmentation and explains how it
      introduces fragility to Internet communication.</t>
      <t>This document also proposes alternatives to IP fragmentation and
      provides recommendations for developers and network operators.</t>
    </abstract>
  </front>
  <middle>
    <section title="Introduction">
      <t><xref target="Kent">Operational numbered="true" toc="default">
      <name>Introduction</name>
      <t>Operational experience </xref> <xref
      target="Huston"/> target="Kent" format="default"/>
      <xref target="Huston" format="default"/> <xref target="RFC7872"/> target="RFC7872" format="default"/>
      reveals that IP fragmentation
      introduces fragility to Internet communication. This document describes
      IP fragmentation and explains the fragility it introduces. It also
      proposes alternatives to IP fragmentation and provides recommendations
      for developers and network operators.</t>
      <t>While this document identifies issues associated with IP
      fragmentation, it does not recommend deprecation. Legacy protocols that
      depend upon IP fragmentation would do well to be updated to remove that dependency.
      However, some applications and environments (see <xref target="rely"/>) target="rely" format="default"/>)
      require IP fragmentation.  In these cases, the protocol will continue
      to rely on IP fragmentation, but the designer should to be aware that
      fragmented packets may result in blackholes; a black holes.  A design should include
      appropriate safeguards.</t>
      <t>Rather than deprecating IP Fragmentation, fragmentation, this document recommends
      that upper-layer protocols address the problem of fragmentation at their
      layer, reducing their reliance on IP fragmentation to the greatest
      degree possible.</t>

<!--
          <section title="IP-in-IP Tunnels">
            <t>This document acknowledges that in some cases, packets must be
            fragmented within IP-in-IP tunnels <xref
            target="I-D.ietf-intarea-tunnels"/>. Therefore, this document makes no
            additional recommendations regarding IP-in-IP tunnels.</t>
           </section>
-->

    <section title="Requirements Language">
      <t>The numbered="true" toc="default">
        <name>Requirements Language</name>
        <t>
    The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
      "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
    "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>",
    "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>",
    "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
    "<bcp14>MAY</bcp14>", and
      "OPTIONAL" "<bcp14>OPTIONAL</bcp14>" in this document are to be
    interpreted as described in BCP 14 <xref
      target="RFC2119">BCP 14</xref> target="RFC2119"/> <xref
    target="RFC8174"/> when, and only when, they appear in all capitals, as
    shown here.</t> here.
        </t>
      </section>
    </section>
    <section title="IP Fragmentation"> numbered="true" toc="default">
      <name>IP Fragmentation</name>
      <section anchor="pmtu" title="Links, numbered="true" toc="default">
        <name>Links, Paths, MTU MTU, and PMTU"> PMTU</name>
        <t>An Internet path connects a source node to a destination node. A
        path may contain links and routers. If a path contains more than one
        link, the links are connected in series series, and a router connects each
        link to the next.</t>
        <t>Internet paths are dynamic. Assume that the path from one node
        to another contains a set of links and routers. If a link or a
        router fails, the path can also change so that it includes a
        different set of links and routers.</t>
        <t>
    Each link is constrained by the number of bytes octets that it can convey in
    a single IP packet.  This constraint is called the link Maximum
    Transmission Unit (MTU). <xref target="RFC0791">IPv4</xref> target="RFC0791" format="default">IPv4</xref>
    requires every link to support at 576 bytes an MTU of 68 octets or greater (see NOTE 1). <xref
    target="RFC0791">IPv6 </xref> target="note-1" format="none">NOTE 1</xref>).
    <xref target="RFC8200" format="default">IPv6</xref> similarly requires every link to
    support an MTU of 1280 bytes octets or greater. These are called the IPv4 and IPv6 minimum link MTU's. MTUs.
</t>
        <t>Some links, and some ways of using links, result in
	additional variable overhead. For the simple case of tunnels,
	this document defers to other documents.  For other cases,
	such as MPLS, this document considers the Link link MTU to include
	appropriate allowance for any such overhead.</t>
        <t>Likewise, each Internet path is constrained by the number of bytes octets
        that it can convey in a single IP packet. This constraint is called
        the Path MTU (PMTU). For any given path, the PMTU is equal to the
        smallest of its link MTU's. MTUs. Because Internet paths are dynamic, PMTU
        is also dynamic.</t>
        <t>For reasons described below, source nodes estimate the PMTU between
        themselves and destination nodes. A source node can produce extremely
        conservative PMTU estimates in which:</t>

        <t><list style="symbols">
            <t>The
        <ul spacing="normal">
          <li>The estimate for each IPv4 path is equal to the IPv4 minimum
            link MTU.</t>

            <t>The MTU.</li>
          <li>The estimate for each IPv6 path is equal to the IPv6 minimum
            link MTU.</t>
          </list>While MTU.</li>
        </ul>
        <t>While these conservative estimates are guaranteed to be less
        than or equal to the actual PMTU, they are likely to be much less than
        the actual PMTU. This may adversely affect upper-layer protocol
        performance.</t>
        <t>By executing <xref target="RFC1191">Path Path MTU Discovery
        (PMTUD)</xref> (PMTUD) procedures <xref target="RFC8201"/> procedures, target="RFC1191" format="default"/>
        <xref target="RFC8201" format="default"/>, a source node can
        maintain a less conservative estimate of the PMTU between itself and a
        destination node. In PMTUD, the source node produces an initial PMTU
        estimate. This initial estimate is equal to the MTU of the first link
        along the path to the destination node. It can be greater than the
        actual PMTU.</t>
        <t>Having produced an initial PMTU estimate, the source node sends
        non-fragmentable IP packets to the destination node (see NOTE 2). <xref target="note-2" format="none">NOTE 2</xref>). If
        one of these packets is larger than the actual PMTU, a downstream
        router will not be able to forward the packet through the next link
        along the path. Therefore, the downstream router drops the packet and
        sends an <xref target="RFC0792">Internet Internet Control Message Protocol
        (ICMP)</xref> (ICMP)
        <xref target="RFC0792" format="default"/> <xref target="RFC4443"/> target="RFC4443" format="default"/> Packet Too Big (PTB) message to
        the source node (see NOTE 3). <xref target="note-3" format="none">NOTE 3</xref>). The ICMP PTB message indicates the MTU
        of the link through which the packet could not be forwarded. The
        source node uses this information to refine its PMTU estimate.</t>
        <t>PMTUD produces a running estimate of the PMTU between a source node
        and a destination node. Because PMTU is dynamic, the PMTU estimate can
        be larger than the actual PMTU. In order to detect PMTU increases,
        PMTUD occasionally resets the PMTU estimate to its initial value and
        repeats the procedure described above.</t>
        <t>Ideally, PMTUD operates as described above. However, in some
        scenarios, PMTUD fails. For example:</t>

        <t><list style="symbols">
            <t>PMTUD
        <ul spacing="normal">
          <li>PMTUD relies on the network's ability to deliver ICMP PTB
            messages to the source node. If the network cannot deliver ICMP
            PTB messages to the source node, PMTUD fails.</t>

            <t>PMTUD fails.</li>
          <li>PMTUD is susceptible to attack because ICMP messages are easily
            <xref target="RFC5927">forged</xref> target="RFC5927" format="default">forged</xref> and not authenticated by the
            receiver. Such attacks can cause PMTUD to produce unnecessarily
            conservative PMTU estimates.</t>
          </list></t>

        <t>
        NOTE 1: In estimates.</li>
        </ul>

<dl newline="false" spacing="normal">
        <dt anchor="note-1">NOTE 1:</dt>
       <dd>In IPv4, every host must be capable of receiving able to reassemble a packet
        whose length is less than or equal to 576 bytes. octets. However, the IPv4 minimum
        link MTU is not 576. Section 3.2 <xref target="RFC0791" section="3.2" sectionFormat="bare" format="default"/>
        of RFC 791 <xref target="RFC0791" format="default">RFC 791</xref> explicitly states
        that the IPv4 minimum link MTU is 68 bytes.  But for practical
        purposes, many network operators consider the IPv4 minimum link
        MTU to be 576 bytes, to minimize the requirement for
        fragmentation en route.  So, for the purposes of this document,
        we assume that the IPv4 minimum link MTU is 576 bytes.
	</t>

        <t>NOTE 2: A octets.
       </dd>
        <dt anchor="note-2">NOTE 2:</dt><dd>A non-fragmentable packet can be fragmented at its source.
        However, it cannot be fragmented by a downstream node. An IPv4 packet
        whose DF-bit Don't Fragment (DF) bit is set to 0 is fragmentable. An IPv4 packet whose
        DF-bit
        DF bit is set to 1 is non-fragmentable. All IPv6 packets are also
        non-fragmentable.</t>

        <t>NOTE 3: The
        non-fragmentable.</dd>
        <dt anchor="note-3">NOTE 3:</dt> <dd>The ICMP PTB message has two instantiations. In <xref
        target="RFC0792">ICMPv4</xref>, target="RFC0792" format="default">ICMPv4</xref>, the ICMP PTB message is a Destination
        Unreachable message with Code equal to 4 fragmentation (fragmentation needed and DF
        set.
        set). This message was augmented by <xref target="RFC1191"/> target="RFC1191" format="default"/> to
        indicate the MTU of the link through which the packet could not be
        forwarded. In <xref target="RFC4443">ICMPv6</xref>, target="RFC4443" format="default">ICMPv6</xref>, the ICMP PTB
        message is a Packet Too Big Message with Code equal to 0. This
        message also indicates the MTU of the link through which the packet
        could not be forwarded.</t> forwarded.</dd>
</dl>
      </section>
      <section title="Fragmentation Procedures"> numbered="true" toc="default">
        <name>Fragmentation Procedures</name>
        <t>When an upper-layer protocol submits data to the underlying IP
        module, and the resulting IP packet's length is greater than the PMTU,
        the packet is divided into fragments. Each fragment includes an IP
        header and a portion of the original packet.</t>
        <t><xref target="RFC0791"/> target="RFC0791" format="default"/> describes IPv4 fragmentation procedures.
        An IPv4 packet whose DF-bit DF bit is set to 1 may be fragmented by the
        source node, but may not be fragmented by a downstream router. An IPv4
        packet whose DF-bit DF bit is set to 0 may be fragmented by the source
        node or by a downstream router. When an IPv4 packet is fragmented, all
        IP options (which are within the IPv4 header) appear in the first fragment, but only options whose "copy"
        bit is set to 1 appear in subsequent fragments.</t>
        <t><xref target="RFC8200"/>, target="RFC8200" format="default"/>, notably in section 4.5,
        Section <xref target="RFC8200" section="4.5" sectionFormat="bare" format="default"/>, describes
	IPv6 fragmentation procedures.  An IPv6 packet may be
	fragmented only at the source node. When an IPv6 packet is
	fragmented, all extension headers appear in the first
	fragment, but only per-fragment headers appear in subsequent
	fragments. Per-fragment headers include the following:</t>

        <t><list style="symbols">
            <t>The
        <ul spacing="normal">
          <li>The IPv6 header.</t>

            <t>The Hop-by-hop header.</li>
          <li>The Hop-by-Hop Options header (if present)</t>

            <t>The present).</li>
          <li>The Destination Options header (if present and if it precedes a
            Routing header)</t>

            <t>The header).</li>
          <li>The Routing Header header (if present)</t>

            <t>The present).</li>
          <li>The Fragment Header</t>
          </list></t> header.</li>
        </ul>
        <t>In IPv4, the upper-layer header usually appears in the
        first fragment, due to the sizes of the headers involved;
	in involved.
        In IPv6, it is required to. </t> the upper-layer header must appear in the first fragment.</t>
      </section>
      <section anchor="upper" title="Upper-Layer numbered="true" toc="default">
        <name>Upper-Layer Reliance on IP Fragmentation"> Fragmentation</name>
        <t>Upper-layer protocols can operate in the following modes:</t>

        <t><list style="symbols">
            <t>Do
        <ul spacing="normal">
          <li>Do not rely on IP fragmentation.</t>

            <t>Rely fragmentation.</li>
          <li>Rely on IP fragmentation by the source node only.</t>

            <t>Rely only.</li>
          <li>Rely on IP fragmentation by any node.</t>
          </list></t> node.</li>
        </ul>
        <t>Upper-layer protocols running over IPv4 can operate in all of the
        above-mentioned modes. Upper-layer protocols running over IPv6 can
        operate in the first and second modes only.</t>
        <t>Upper-layer protocols that operate in the first two modes (above)
        require access to the PMTU estimate. In order to fulfill this
        requirement, they can:</t>

        <t><list style="symbols">
            <t>Estimate
        <ul spacing="normal">
          <li>Estimate the PMTU to be equal to the IPv4 or IPv6 minimum link
            MTU.</t>

            <t>Access
            MTU.</li>
          <li>Access the estimate that PMTUD produced.</t>

            <t>Execute produced.</li>
          <li>Execute PMTUD procedures themselves.</t>

            <t>Execute <xref target="RFC4821">Packetization themselves.</li>
          <li>Execute Packetization Layer PMTUD
            (PLPMTUD)</xref> (PLPMTUD) procedures
            <xref target="I-D.ietf-tsvwg-datagram-plpmtud"/>
            procedures.</t>
          </list>According target="RFC4821" format="default"/>
            <xref target="RFC8899" format="default"/>.</li>
        </ul>
        <t>According to PLPMTUD procedures, the upper-layer protocol
        maintains a running PMTU estimate. It does so by sending probe packets
        of various sizes to its upper-layer peer and receiving
        acknowledgements. This strategy differs from PMTUD in that it relies
        on acknowledgement of received messages, as opposed to ICMP PTB
        messages concerning dropped messages. Therefore, PLPMTUD does not rely
        on the network's ability to deliver ICMP PTB messages to the
        source.</t>
      </section>
    </section>
    <section title="Increased Fragility"> numbered="true" toc="default">
      <name>Increased Fragility</name>
      <t>This section explains how IP fragmentation introduces fragility to
      Internet communication.</t>
      <section anchor="virtualreassembly" title="Virtual Reassembly"> numbered="true" toc="default">
        <name>Virtual Reassembly</name>
        <t>Virtual reassembly is a procedure in which a device
	conceptually reassembles a packet, forwards its fragments, and discards
	the reassembled copy. In A+P <xref target="RFC6346" format="default">Address plus Port (A+P)</xref>
        and CGN, <xref target="RFC6888" format="default">Carrier Grade NAT (CGN)</xref>, virtual reassembly
	is required in order to correctly translate fragment
	addresses.  It could be useful to address the problems in Sections
	<xref target="mb"/>, target="mb" format="counter"/>, <xref target="nat"/>, target="nat" format="counter"/>,
        <xref
	target="statelessfirewall"/>, target="statelessfirewall" format="counter"/>, and <xref target="ecmp"/>. target="ecmp" format="counter"/>.
        </t>
        <t>Virtual reassembly in the network is problematic, however,
	because it is computationally expensive and because it holds
        state for indeterminate periods of time, time. Therefore, it is prone
        to errors
	and, is prone to <xref target="at">attacks</xref>.</t>

	<t>One of the benefits of fragmenting at the source, as IPv6 does,
	is that there is no question of temporary state or involved
	processes as required in virtual fragmentation. The sender
	has the entire message, and is fragmenting it as needed -
	and can apply that knowledge consistently across the fragments
	it produces. It is better than virtual fragmentation in
	that sense.</t> <xref target="at" format="default">attacks</xref>.</t>
      </section>
      <section anchor="mb" title="Policy-Based Routing"> numbered="true" toc="default">
        <name>Policy-Based Routing</name>
        <t>IP Fragmentation fragmentation causes problems for routers that implement
        policy-based routing.</t>
        <t>When a router receives a packet, it identifies the next-hop next hop on
        route to the packet's destination and forwards the packet to that
        next-hop.
        next hop. In order to identify the next-hop, next hop, the router interrogates a
        local data structure called the Forwarding Information Base (FIB).</t>
        <t>Normally, the FIB contains destination-based entries that map a
        destination prefix to a next-hop. next hop. Policy-based routing allows
        destination-based and policy-based entries to coexist in the same FIB.
        A policy-based FIB entry maps multiple fields, drawn from either the
        IP or transport-layer header, to a next-hop.</t> next hop.</t>
        <t/>

        <texttable
        <table anchor="FIB" style="full" title="Policy-Based align="center">
          <name>Policy-Based Routing FIB">
          <ttcol align="center">Entry</ttcol>

          <ttcol align="left">Type</ttcol>

          <ttcol>Dest. Prefix</ttcol>

          <ttcol FIB</name>
          <thead>
            <tr>
              <th align="center">Entry</th>
              <th align="left">Type</th>
              <th align="left">Dest.&nbsp;Prefix</th>
              <th align="left">Next Hdr / Dest. Port</ttcol>

          <ttcol>Next-Hop</ttcol>

          <c/>

          <c/>

          <c/>

          <c/>

          <c/>

          <c>1</c>

          <c>Destination- based</c>

          <c>2001:db8::1/128</c>

          <c>Any Port</th>
              <th align="left">Next Hop</th>
            </tr>
          </thead>
          <tbody>
            <tr>
              <td align="center">1</td>
              <td align="left">Destination-based</td>
              <td align="left">2001:db8::1/128</td>
              <td align="left">Any / Any</c>

          <c>2001:db8::2</c>

          <c/>

          <c/>

          <c/>

          <c/>

          <c/>

          <c>2</c>

          <c>Policy- based</c>

          <c>2001:db8::1/128</c>

          <c>TCP Any</td>
              <td align="left">2001:db8:2::2</td>
            </tr>
            <tr>
              <td align="center">2</td>
              <td align="left">Policy-based</td>
              <td align="left">2001:db8::1/128</td>
              <td align="left">TCP / 80</c>

          <c>2001:db8::3</c>
        </texttable> 80</td>
              <td align="left">2001:db8:3::3</td>
            </tr>
          </tbody>
        </table>
        <t>Assume that a router maintains the FIB in <xref target="FIB"/>. target="FIB" format="default"/>. The
        first FIB entry is destination-based. It maps a destination prefix
        2001:db8::1/128 to a next-hop 2001:db8::2. next hop 2001:db8:2::2. The second FIB entry is
        policy-based. It maps the same destination prefix 2001:db8::1/128
        and a destination port ( TCP (TCP / 80 ) 80) to a different next-hop
        (2001:db8::3). next hop
        (2001:db8:3::3). The second entry is more specific than the first.</t>
        <t>When the router receives the first fragment of a packet that is
        destined for TCP port 80 on 2001:db8::1, it interrogates the FIB. Both
        FIB entries satisfy the query. The router selects the second FIB entry
        because it is more specific and forwards the packet to
        2001:db8::3.</t>
        2001:db8:3::3.</t>
        <t>When the router receives the second fragment of the packet, it
        interrogates the FIB again. This time, only the first FIB entry
        satisfies the query, because the second fragment contains no
        indication that the packet is destined for TCP port 80. Therefore, the
        router selects the first FIB entry and forwards the packet to
        2001:db8::2.</t>
        2001:db8:2::2.</t>
        <t>Policy-based routing is also known as filter-based-forwarding.</t> filter-based forwarding.</t>
      </section>
      <section anchor="nat" title="Network numbered="true" toc="default">
        <name>Network Address Translation (NAT)"> (NAT)</name>
        <t>IP fragmentation causes problems for Network Address Translation
        (NAT) devices. When a NAT device detects a new, outbound flow, it maps
        that flow's source port and IP address to another source port and IP
        address. Having created that mapping, the NAT device translates:</t>

        <t><list style="symbols">
            <t>The Source
        <ul spacing="normal">
          <li>The source IP Address address and Source Port source port on each outbound
            packet.</t>

            <t>The Destination
            packet.</li>
          <li>The destination IP Address address and Destination Port destination port on each inbound
            packet.</t>
          </list></t>
            packet.</li>
        </ul>
        <t></t>
        <t><xref target="RFC6346">A+P</xref> target="RFC6346" format="default">A+P</xref> and
        <xref
        target="RFC6888">Carrier target="RFC6888" format="default">Carrier Grade NAT (CGN)</xref>
        are two common NAT strategies. In both approaches approaches, the NAT device must virtually
        reassemble fragmented packets in order to translate and forward each
        fragment. (See NOTE 1.)</t>
        fragment.</t>
      </section>
      <section anchor="statelessfirewall" title="Stateless Firewalls"> numbered="true" toc="default">
        <name>Stateless Firewalls</name>
        <t>As discussed in more detail in <xref target="at"/>, target="at" format="default"/>, IP
        fragmentation causes problems for stateless firewalls whose rules
        include TCP and UDP ports. Because port information is only
	available in the first fragment and not available
        in the subsequent fragments fragments, the firewall is limited to the following
        options:</t>

        <t><list style="symbols">
            <t>Accept
        <ul spacing="normal">
          <li>Accept all trailing subsequent, subsequent fragments, possibly admitting certain
            classes of attack.</t>

            <t>Block attack.</li>
          <li>Block all subsequent fragments, possibly blocking legitimate
            traffic.</t>
          </list>Neither
            traffic.</li>
        </ul>
        <t>Neither option is attractive.</t>
      </section>
      <section anchor="ecmp" title="Equal Cost numbered="true" toc="default">
        <name>Equal-Cost Multipath, Link Aggregate Groups Groups, and Stateless Load-Balancers"> Load Balancers</name>
        <t>IP fragmentation causes problems for Equal Cost Equal-Cost Multipath (ECMP),
        Link Aggregate Groups (LAG) (LAG), and other stateless load-distribution
        technologies. In order to assign a packet or packet fragment to a
        link, an intermediate node executes a hash (i.e., load-distributing)
        algorithm. The following paragraphs describe a commonly deployed hash
        algorithm.</t>
        <t>If the packet or packet fragment contains a transport-layer header,
        the algorithm accepts the following 5-tuple as input:</t>

        <t><list style="symbols">
            <t>IP
        <ul spacing="normal">

          <li>IP Source Address.</t>

            <t>IP Address.</li>
          <li>IP Destination Address.</t>

            <t>IPv4 Address.</li>
          <li>IPv4 Protocol or IPv6 Next Header.</t>

            <t>transport-layer Header.</li>
          <li>transport-layer source port.</t>

            <t>transport-layer port.</li>
          <li>transport-layer destination port.</t>
          </list>If port.</li>
        </ul>
        <t>If the packet or packet fragment does not contain a
        transport-layer header, the algorithm accepts only the following
        3-tuple as input:</t>

        <t><list style="symbols">
            <t>IP
        <ul spacing="normal">
          <li>IP Source Address.</t>

            <t>IP Address.</li>
          <li>IP Destination Address.</t>

            <t>IPv4 Address.</li>
          <li>IPv4 Protocol or IPv6 Next Header.</t>
          </list></t> Header.</li>
        </ul>
        <t>Therefore, non-fragmented packets belonging to a flow can be
        assigned to one link while fragmented packets belonging to the same
        flow can be divided between that link and another. This can cause
        suboptimal load-distribution.</t> load distribution.</t>
        <t><xref target="RFC6438"/> target="RFC6438" format="default"/> offers a partial solution to this problem
        for IPv6 devices only. According to <xref target="RFC6438"/>:</t>

        <t>"At target="RFC6438" format="default"/>:</t>
        <blockquote>At intermediate routers that perform load balancing, distribution, the hash
        algorithm used to determine the outgoing component-link in an ECMP
        and/or LAG toward the next hop MUST <bcp14>MUST</bcp14> minimally include the 3-tuple
        {dest addr, source addr, flow label} and MAY <bcp14>MAY</bcp14> also include the
        remaining components of the 5-tuple."</t> 5-tuple.</blockquote>
        <t>If the algorithm includes only the 3-tuple {dest addr, source addr,
        flow label}, it will assign all fragments belonging to a packet to the
        same link. (See <xref target="RFC6437"/> target="RFC6437" format="default"/> and <xref
        target="RFC7098"/>).</t> target="RFC7098" format="default"/>).</t>
        <t>In order to avoid the problem described above, implementations
        SHOULD
        <bcp14>SHOULD</bcp14> implement the recommendations provided in <xref
        target="lagrec"/> target="lagrec" format="default"/> of this document.</t>
      </section>
      <section title="IPv4 numbered="true" toc="default">
        <name>IPv4 Reassembly Errors at High Data Rates"> Rates</name>
        <t>IPv4 fragmentation is not sufficiently robust for use under some
        conditions in today's Internet. At high data rates, the 16-bit IP
        identification field is not large enough to prevent duplicate IDs IDs, resulting in frequent
        incorrectly assembled IP fragments, and the TCP and UDP checksums are
        insufficient to prevent the resulting corrupted datagrams from being
        delivered to higher protocol layers. upper-layer protocols. <xref target="RFC4963"/> target="RFC4963" format="default"/>
        describes some easily reproduced experiments demonstrating the
        problem,
        problem and discusses some of the operational implications of these
        observations.</t>
        <t>These reassembly issues do not occur as frequently in IPv6 because
        the IPv6 identification field is 32 bits long.</t>
      </section>
      <section anchor="at" title="Security Vulnerabilities"> numbered="true" toc="default">
        <name>Security Vulnerabilities</name>
        <t>Security researchers have documented several attacks that exploit
        IP fragmentation. The following are examples:</t>

        <t><list style="symbols">
            <t>Overlapping
        <ul spacing="normal">
          <li>Overlapping fragment attacks <xref target="RFC1858"/><xref
            target="RFC3128"/><xref target="RFC5722"/></t>

            <t>Resource target="RFC1858" format="default"/>
          <xref target="RFC3128" format="default"/> <xref target="RFC5722" format="default"/>.</li>
          <li>Resource exhaustion attacks</t>

            <t>Attacks attacks.</li>
          <li>Attacks based on predictable fragment identification values
            <xref target="RFC7739"/></t>

            <t>Evasion target="RFC7739" format="default"/>.</li>
          <li>Evasion of Network Intrusion Detection Systems (NIDS) <xref
            target="Ptacek1998"/></t>
          </list></t> target="Ptacek1998" format="default"/>.</li>
        </ul>
        <t>In the overlapping fragment attack, an attacker constructs a series
        of packet fragments. The first fragment contains an IP header, a
        transport-layer header, and some transport-layer payload. This
        fragment complies with local security policy and is allowed to pass
        through a stateless firewall. A second fragment, having a non-zero nonzero
        offset, overlaps with the first fragment. The second fragment also
        passes through the stateless firewall. When the packet is reassembled,
        the transport layer transport-layer header from the first fragment is overwritten by
        data from the second fragment. The reassembled packet does not comply
        with local security policy. Had it traversed the firewall in one
        piece, the firewall would have rejected it.</t>
        <t>A stateless firewall cannot protect against the overlapping
        fragment attack. However, destination nodes can protect against the
        overlapping fragment attack by implementing the procedures described
        in RFC 1858, RFC 3128 3128, and RFC 8200. These reassembly procedures detect
        the overlap and discard the packet.</t>
        <t>The fragment reassembly algorithm is a stateful procedure in an
        otherwise stateless protocol. Therefore, it can be exploited by
        resource exhaustion attacks. An attacker can construct a series of
        fragmented packets, packets with one fragment missing from each packet so that
        the reassembly is impossible. Thus, this attack causes resource
        exhaustion on the destination node, possibly denying reassembly
        services to other flows. This type of attack can be mitigated by
        flushing fragment reassembly buffers when necessary, at the expense of
        possibly dropping legitimate fragments.</t>
        <t>Each IP fragment contains an "Identification" field that
        destination nodes use to reassemble fragmented packets. Some
        implementations set the Identification field to a predictable value,
        thus making it easy for an attacker to forge malicious IP fragments
        that would cause the reassembly procedure for legitimate packets to
        fail.</t>
        <t>NIDS aims at identifying malicious activity by analyzing network
        traffic. Ambiguity in the possible result of the fragment reassembly
        process may allow an attacker to evade these systems. Many of these
        systems try to mitigate some of these evasion techniques (e.g. By (e.g., by
        computing all possible outcomes of the fragment reassembly process, at
        the expense of increased processing requirements).</t>
      </section>
      <section anchor="PTB" title="PMTU Blackholing numbered="true" toc="default">
        <name>PMTU Black-Holing Due to ICMP Loss"> Loss</name>
        <t>As mentioned in <xref target="upper"/>, target="upper" format="default"/>, upper-layer protocols can
        be configured to rely on PMTUD. Because PMTUD relies upon the network
        to deliver ICMP PTB messages, those protocols also rely on the
        networks to deliver ICMP PTB messages.</t>
        <t>According to <xref target="RFC4890"/>, target="RFC4890" format="default"/>, ICMPv6 PTB messages must not
        be filtered. However, ICMP PTB delivery is not reliable. It is subject
        to both transient and persistent loss.</t>
        <t>Transient loss of ICMP PTB messages can cause transient PMTU black
        holes. When the conditions contributing to transient loss abate, the
        network regains its ability to deliver ICMP PTB messages and
        connectivity between the source and destination nodes is restored.
        <xref target="transLoss"/> target="transLoss" format="default"/> of this document describes conditions that
        lead to transient loss of ICMP PTB messages.</t>
        <t>Persistent loss of ICMP PTB messages can cause persistent black
        holes. Sections <xref target="CPE"/>, target="CPE" format="counter"/>, <xref target="Anycast"/>, target="Anycast" format="counter"/>,
        and <xref
        target="unidirectional"/> target="unidirectional" format="counter"/> of this document describe conditions that
        lead to persistent loss of ICMP PTB messages.</t>
        <t>The problem described in this section is specific to PMTUD. It does
        not occur when the upper-layer protocol obtains its PMTU estimate from
        PLPMTUD or from any other source.</t>
        <section anchor="transLoss" title="Transient Loss"> numbered="true" toc="default">
          <name>Transient Loss</name>
          <t>The following factors can contribute to transient loss of ICMP
          PTB messages:</t>

          <t><list style="symbols">
              <t>Network congestion.</t>

              <t>Packet corruption.</t>

              <t>Transient
          <ul spacing="normal">
            <li>Network congestion.</li>
            <li>Packet corruption.</li>
            <li>Transient routing loops.</t>

              <t>ICMP loops.</li>
            <li>ICMP rate limiting.</t>
            </list></t> limiting.</li>
          </ul>
          <t>The effect of rate limiting may be severe, as RFC 4443 recommends
          strict rate limiting of ICMPv6 traffic.</t>
        </section>
        <section anchor="CPE"
                 title="Incorrect numbered="true" toc="default">
          <name>Incorrect Implementation of Security Policy"> Policy</name>
          <t>Incorrect implementation of security policy can cause persistent
          loss of ICMP PTB messages.</t>
          <t>For example example, assume that a Customer Premise Premises Equipment (CPE) router implements
          the following zone-based security policy:</t>

          <t><list style="symbols">
              <t>Allow
          <ul spacing="normal">
            <li>Allow any traffic to flow from the inside zone to the outside
              zone.</t>

              <t>Do
              zone.</li>
            <li>Do not allow any traffic to flow from the outside zone to the
              inside zone unless it is part of an existing flow (i.e., it was
              elicited by an outbound packet).</t>

            </list>When packet).</li>
          </ul>
          <t>When a correct implementation of the above-mentioned
          security policy receives an ICMP PTB message, it examines the ICMP
          PTB payload in order to determine whether the original packet (i.e.,
          the packet that elicited the ICMP PTB message) belonged to an
          existing flow. If the original packet belonged to an existing flow,
          the implementation allows the ICMP PTB to flow from the outside zone
          to the inside zone. If not, the implementation discards the ICMP PTB
          message.</t>
          <t>When an incorrect implementation of the above-mentioned security
          policy receives an ICMP PTB message, it discards the packet because
          its source address is not associated with an existing flow.</t>
          <t>The security policy described above has been implemented incorrectly on
             many consumer CPE routers.</t>
        </section>
        <section anchor="Anycast" title="Persistent numbered="true" toc="default">
          <name>Persistent Loss Caused By Anycast "> by Anycast</name>
          <t>Anycast can cause persistent loss of ICMP PTB messages. Consider
          the example below:</t>
          <t>A DNS client sends a request to an anycast address. The network
          routes that DNS request to the nearest instance of that anycast
          address (i.e., a DNS Server). server). The DNS server generates a response
          and sends it back to the DNS client. While the response does not
          exceed the DNS server's PMTU estimate, it does exceed the actual
          PMTU.</t>
          <t>A downstream router drops the packet and sends an ICMP PTB
          message the packet's source (i.e., the anycast address). The network
          routes the ICMP PTB message to the anycast instance closest to the
          downstream router. That anycast instance may not be the DNS server
          that originated the DNS response. It may be another DNS server with
          the same anycast address. The DNS server that originated the
          response may never receive the ICMP PTB message and may never update
          its PMTU estimate.</t>
        </section>
        <section anchor="unidirectional"
                 title="Persistent numbered="true" toc="default">
          <name>Persistent Loss Caused By by Unidirectional Routing"> Routing</name>
          <t>Unidirectional routing can cause persistent loss of ICMP PTB
          messages. Consider the example below:</t>
          <t>A source node sends a packet to a destination node. All
          intermediate nodes maintain a route to the destination node, node but do
          not maintain a route to the source node. In this case, when an
          intermediate node encounters an MTU issue, it cannot send an ICMP
          PTB message to the source node.</t>
        </section>
      </section>
      <section title="Blackholing numbered="true" toc="default">
        <name>Black-Holing Due To to Filtering or Loss"> Loss</name>
        <t>In RFC 7872, researchers sampled Internet paths to determine
        whether they would convey packets that contain IPv6 extension headers.
        Sampled paths terminated at popular Internet sites (e.g., popular web,
        mail
        mail, and DNS servers).</t>
        <t>The study revealed that at least 28% of the sampled paths did not
        convey packets containing the IPv6 Fragment extension header. In most
        cases, fragments were dropped in the destination autonomous system. In
        other cases, the fragments were dropped in transit autonomous
        systems.</t>
        <t>Another <xref target="Huston">study</xref> target="Huston" format="default">study</xref> confirmed this
        finding. It reported that 37% of sampled endpoints used IPv6-capable
        DNS resolvers that were incapable of receiving a fragmented IPv6
        response.</t>
        <t>It is difficult to determine why network operators drop fragments.
        Possible causes follow:</t>

        <t><list style="symbols">
            <t>Hardware
        <ul spacing="normal">
          <li>Hardware inability to process fragmented packets.</t>

            <t>Failure packets.</li>
          <li>Failure to change vendor defaults.</t>

            <t>Unintentional misconfiguration.</t>

            <t>Intentional defaults.</li>
          <li>Unintentional misconfiguration.</li>
          <li>Intentional configuration (e.g., network operators consciously
            chooses to drop IPv6 fragments in order to address the issues
            raised in Sections <xref target="mb"/> target="mb" format="counter"/> through <xref target="PTB"/>,
            above.)</t>
          </list></t> target="PTB" format="counter"/>,
            above.)</li>
        </ul>
      </section>
    </section>
    <section title="Alternatives numbered="true" toc="default">
      <name>Alternatives to IP Fragmentation"> Fragmentation</name>
      <t/>
      <section title="Transport Layer Solutions"> numbered="true" toc="default">
        <name>Transport-Layer Solutions</name>
        <t>The <xref target="RFC0793">Transport target="RFC0793" format="default">Transport Control Protocol (TCP)</xref>)
        can be operated in a mode that does not require IP fragmentation.</t>
        <t>Applications submit a stream of data to TCP. TCP divides that
        stream of data into segments, with no segment exceeding the TCP
        Maximum Segment Size (MSS). Each segment is encapsulated in a TCP
        header and submitted to the underlying IP module. The underlying IP
        module prepends an IP header and forwards the resulting packet.</t>
        <t>If the TCP MSS is sufficiently small, then the underlying IP module
        never produces a packet whose length is greater than the actual PMTU.
        Therefore, IP fragmentation is not required.</t>
        <t>TCP offers the following mechanisms for MSS management:</t>

        <t><list style="symbols">
            <t>Manual configuration</t>

            <t>PMTUD</t>

            <t>PLPMTUD</t>
          </list></t>
        <ul spacing="normal">
          <li>Manual configuration.</li>
          <li>PMTUD.</li>
          <li>PLPMTUD.</li>
        </ul>
        <t>Manual configuration is always applicable. If the MSS is configured
        to a sufficiently low value, the IP layer will never produce a packet
        whose length is greater than the protocol minimum link MTU. However,
        manual configuration prevents TCP from taking advantage of larger link
        MTU's.</t>
        MTUs.</t>
        <t>Upper-layer protocols can implement PMTUD in order to discover and
        take advantage of larger path Path MTUs. However, as mentioned in
        <xref
        target="pmtu"/>, target="pmtu" format="default"/>, PMTUD relies upon the network to deliver ICMP PTB
        messages. Therefore, PMTUD can only provide an estimate of the PMTU in
        environments where the risk of ICMP PTB loss is acceptable (e.g.,
        known to not be filtered).</t>
        <t>By contrast, PLPMTUD does not rely upon the network's ability to
        deliver ICMP PTB messages. It utilises utilizes probe messages sent as TCP
        segments to determine whether the probed PMTU can be successfully used
        across the network path. In PLPMTUD, probing is separated from
        congestion control, so that loss of a TCP probe segment does not cause
        a reduction of the congestion control window. <xref target="RFC4821"/> target="RFC4821" format="default"/>
        defines PLPMTUD procedures for TCP.</t>
        <t>While TCP will never knowingly cause the underlying IP module to
        emit a packet that is larger than the PMTU estimate, it can cause the
        underlying IP module to emit a packet that is larger than the actual
        PMTU. For example, if routing changes and as a result the PMTU becomes
        smaller, TCP will not know until the ICMP PTB message arrives. If this
        occurs, the packet is dropped, the PMTU estimate is updated, the
        segment is divided into smaller segments segments, and each smaller segment is
        submitted to the underlying IP module.</t>
        <t>The <xref target="RFC4340">Datagram target="RFC4340" format="default">Datagram Congestion Control Protocol
        (DCCP)</xref> and the <xref target="RFC4960">Stream target="RFC4960" format="default">Stream Control Transport Transmission
        Protocol (SCTP)</xref> also can be operated in a mode that does not
        require IP fragmentation. They both accept data from an application
        and divide that data into segments, with no segment exceeding a
        maximum size.
        </t>
        <t>DCCP offers manual configuration,
        PMTUD, and PLPMTUD as mechanisms for managing that maximum size.
        Datagram protocols can also implement PLPMTUD to estimate the PMTU
        via<xref target="I-D.ietf-tsvwg-datagram-plpmtud"/>.
        via <xref target="RFC8899" format="default"/>. This proposes
        procedures for performing PLPMTUD with UDP, UDP-Options, UDP options, SCTP, QUIC QUIC,
        and other datagram protocols.</t>
        <t>Currently, <xref target="RFC0768">User target="RFC0768" format="default">User Datagram Protocol (UDP)</xref>
        lacks a fragmentation mechanism of its own and relies on IP
        fragmentation. However, <xref target="I-D.ietf-tsvwg-udp-options"/> target="I-D.ietf-tsvwg-udp-options" format="default"/>
        proposes a fragmentation mechanism for UDP.</t>
      </section>
      <section title="Application Layer Solutions"> numbered="true" toc="default">
        <name>Application-Layer Solutions</name>
        <t><xref target="RFC8085"/> target="RFC8085" format="default"/> recognizes that IP fragmentation reduces
        the reliability of Internet communication. It also recognizes that UDP
        lacks a fragmentation mechanism of its own and relies on IP
        fragmentation. Therefore, <xref target="RFC8085"/> target="RFC8085" format="default"/> offers the
        following advice regarding applications the run over the UDP.</t>

        <t>"An UDP:</t>
        <blockquote>An application SHOULD NOT <bcp14>SHOULD NOT</bcp14> send UDP datagrams that result in IP
        packets that exceed the Maximum Transmission Unit (MTU) along the path
        to the destination. Consequently, an application SHOULD <bcp14>SHOULD</bcp14> either use the
        path MTU information provided by the IP layer or implement Path MTU
        Discovery (PMTUD) itself <xref target="RFC1191" format="default"/>
        <xref target="RFC1981" format="default"/> <xref target="RFC4821" format="default"/> to determine whether the path to a
        destination will support its desired message size without
        fragmentation."</t>
        fragmentation.</blockquote>
        <t>RFC 8085 continues:</t>

        <t>"Applications
        <blockquote>Applications that do not follow the recommendation to do
        PMTU/PLPMTUD discovery SHOULD <bcp14>SHOULD</bcp14> still avoid sending UDP datagrams that
        would result in IP packets that exceed the path MTU. Because the
        actual path MTU is unknown, such applications SHOULD <bcp14>SHOULD</bcp14> fall back to
        sending messages that are shorter than the default effective MTU for
        sending (EMTU_S in <xref target="RFC1122"/>). target="RFC1122" format="default"/>). For IPv4, EMTU_S is the
        smaller of 576 bytes and the first-hop MTU. MTU <xref target="RFC1122" format="default"/>.  For IPv6, EMTU_S is 1280
        bytes <xref target="RFC8200"/>. target="RFC2460" format="default"/>. The effective PMTU for a directly
        connected destination (with no routers on the path) is the configured
        interface MTU, which could be less than the maximum link payload size.
        Transmission of minimum-sized UDP datagrams is inefficient over paths
        that support a larger PMTU, which is a second reason to implement PMTU
        discovery."</t>
        discovery.</blockquote>
        <t>RFC 8085 assumes that for IPv4, IPv4 an EMTU_S of 576 is sufficiently
        small to be supported by most current Internet
        paths, even though the IPv4 minimum link MTU is 68 bytes.</t> octets.</t>
        <t>This advice applies equally to any application that runs directly
        over IP.</t>
      </section>
    </section>
    <section anchor="rely"
             title="Applications numbered="true" toc="default">
      <name>Applications That Rely on IPv6 Fragmentation"> Fragmentation</name>
      <t>The following applications rely on IPv6 fragmentation:</t>

      <t><list style="symbols">
          <t><xref target="RFC1035">DNS </xref></t>

          <t><xref target="RFC2328">OSPFv3</xref><xref target="RFC5340">
          </xref></t>

          <t>Packet-in-packet encapsulations</t>
        </list>Each
      <ul spacing="normal">
        <li><xref target="RFC1035" format="default">DNS</xref>.</li>
        <li><xref target="RFC2328" format="default">OSPFv2</xref>.</li>
        <li><xref target="RFC5340" format="default">OSPFv3</xref>.</li>
        <li>Packet-in-packet encapsulations.</li>
      </ul>
      <t>Each of these applications relies on IPv6 fragmentation to a
      varying degree. In some cases, that reliance is essential, essential and cannot be
      broken without fundamentally changing the protocol. In other cases, that
      reliance is incidental, and most implementations already take
      appropriate steps to avoid fragmentation.</t>
      <t>This list is not comprehensive, and other protocols that rely on IP
      fragmentation may exist. They are not specifically considered in the
      context of this document.</t>
      <section title="Domain numbered="true" toc="default">
        <name>Domain Name Service (DNS)"> (DNS)</name>
        <t>DNS relies on UDP for efficiency, and the consequence is the use of
        IP fragmentation for large responses, as permitted by the Extension Mechanisms for DNS EDNS0 (EDNS0)
        options in the query. It is possible to mitigate the issue of
        fragmentation-based packet loss by having queries use smaller EDNS0
        UDP buffer sizes, sizes or by having the DNS server limit the size of its
        UDP responses to some self-imposed maximum packet size that may be
        less than the preferred EDNS0 UDP Buffer Size. buffer size. In both cases, large
        responses are truncated in the DNS, signaling to the client to
        re-query using TCP to obtain the complete response. However, the
        operational issue of the partial level of support for DNS over TCP,
        particularly in the case where IPv6 transport is being used, becomes a
        limiting factor of the efficacy of this approach <xref
        target="Damas"/>.</t> target="Damas" format="default"/>.</t>
        <t>Larger DNS responses can normally be avoided by aggressively
        pruning the Additional section of DNS responses. One scenario where
        such pruning is ineffective is in the use of DNSSEC, where large key
        sizes act to increase the response size to certain DNS queries. There
        is no effective response to this situation within the DNS other than
        using smaller cryptographic keys and adoption adopting of DNSSEC administrative
        practices that attempt to keep DNS response as short as possible.</t>
      </section>
      <section title="Open numbered="true" toc="default">
        <name>Open Shortest Path First (OSPF)"> (OSPF)</name>
        <t>OSPF implementations can emit messages large enough to cause
        fragmentation. However, in order to optimize performance, most OSPF
        implementations restrict their maximum message size to a value that
        will not cause fragmentation.</t>
      </section>
      <section title="Packet-in-Packet Encapsulations"> numbered="true" toc="default">
        <name>Packet-in-Packet Encapsulations</name>
        <t> This document acknowledges that in some cases, packets must
	be fragmented within IP-in-IP tunnels.  Therefore, this document
	makes no additional recommendations regarding IP-in-IP
	tunnels.</t>
        <t>In this document, packet-in-packet encapsulations include
        <xref
        target="RFC2003">IP-in-IP </xref>, target="RFC2003" format="default">IP-in-IP</xref>,
        <xref target="RFC2784">Generic target="RFC2784" format="default">Generic Routing Encapsulation (GRE) </xref>, (GRE)</xref>,
        <xref
        target="RFC8086">GRE-in-UDP</xref> target="RFC8086" format="default">GRE-in-UDP</xref>, and
        <xref target="RFC2473">Generic target="RFC2473" format="default">Generic Packet Tunneling in IPv6</xref>.
        <xref target="RFC4459"/> target="RFC4459" format="default"/> describes
        fragmentation issues associated with all of the above-mentioned
        encapsulations.</t>
        <t>The fragmentation strategy described for GRE in
        <xref
        target="RFC7588"/> target="RFC7588" format="default"/> has been deployed for all of the above-mentioned
        encapsulations. This strategy does not rely on IP fragmentation except
        in one corner case. (see Section 3.3.2.2 of RFC 7588
        (See <xref target="RFC7588" section="3.3.2.2" sectionFormat="of" format="default"/>
        and Section 7.1
        of RFC 2473). Section 3.3 of <xref target="RFC7676"/> target="RFC2473" section="7.1" sectionFormat="of" format="default"/>.)
        <xref target="RFC7676" section="3.3" sectionFormat="of" format="default"/> further
        describes this corner case.</t>
        <t>See <xref target="I-D.ietf-intarea-tunnels"/> target="I-D.ietf-intarea-tunnels" format="default"/> for further
        discussion.</t>
      </section>
      <section title="UDP numbered="true" toc="default">
        <name>UDP Applications Enhancing Performance"> Performance</name>
        <t>Some UDP applications rely on IP fragmentation to achieve
        acceptable levels of performance. These applications use UDP datagram
        sizes that are larger than the path Path MTU so that more data can be
        conveyed between the application and the kernel in a single system
        call.</t>
        <t>To pick one example, the <xref target="RFC5326">Licklider target="RFC5326" format="default">Licklider
        Transmission Protocol (LTP),</xref>which (LTP)</xref>, which is in current use on the
        International Space Station (ISS), uses UDP datagram sizes larger than
        the path Path MTU to achieve acceptable levels of performance even though
        this invokes IP fragmentation. More generally, SNMP and video
        applications may transmit an application-layer quantum of data,
        depending on the network layer to fragment and reassemble as
        needed.</t>
        <t/>
      </section>
    </section>
    <section title="Recommendations"> numbered="true" toc="default">
      <name>Recommendations</name>
      <t/>
      <section title="For numbered="true" toc="default">
        <name>For Application and Protocol Developers"> Developers</name>
        <t>Developers SHOULD NOT <bcp14>SHOULD NOT</bcp14> develop new protocols or applications that
        rely on IP fragmentation. When a new protocol or application is
        deployed in an environment that does not fully support IP
        fragmentation, it SHOULD <bcp14>SHOULD</bcp14> operate correctly, either in its default
        configuration or in a specified alternative configuration.</t>
        <t>While there may be controlled environments where IP
	fragmentation
        works reliably, this is a deployment issue and can not be known
        to someone developing a new protocol or application.  It is not
        recommended that new protocols or applications be developed that
        rely on IP fragmentation.
	<!--
        Protocols and applications that rely
	on IP fragmentation will fail to work on the Internet.
        -->
	Protocols and
        applications that rely on IP fragmentation will work less
        reliably on the Internet.
	<!--unless they also include mechanisms to detect that IP
        fragmentation isn't working reliably.
        -->
        </t>
        <t>Legacy protocols that depend upon IP fragmentation SHOULD <bcp14>SHOULD</bcp14> be
        updated to break that dependency. However, in some cases, there may be
        no viable alternative to IP fragmentation (e.g., IPSEC tunnel mode,
        IP-in-IP encapsulation).
        Applications and protocols cannot necessarily know or control
	whether they use lower layers or network paths that rely on such
	fragmentation.
	In these cases, the protocol will continue to
        rely on IP fragmentation but should only be used in environments where
        IP fragmentation is known to be supported.</t>
        <t>Protocols may be able to avoid IP fragmentation by using a
        sufficiently small MTU (e.g., The protocol minimum link MTU), disabling
        IP fragmentation, and ensuring that the transport protocol in use
        adapts its segment size to the MTU. Other protocols may deploy a
        sufficiently reliable PMTU discovery mechanism (e.g., PLMPTUD).
<!--
        The risks of IP fragmentation can also be mitigated through the
	use of encapsulation, e.g., by transmitting IP fragments as
	payloads.
--> PLPMTUD).
        </t>
        <t>UDP applications SHOULD <bcp14>SHOULD</bcp14> abide by the recommendations stated in
        Section 3.2 of
        <xref target="RFC8085"/>.</t> target="RFC8085" section="3.2" sectionFormat="of" format="default"/>.</t>
      </section>
      <section title="For numbered="true" toc="default">
        <name>For System Developers"> Developers</name>
        <t>Software libraries SHOULD <bcp14>SHOULD</bcp14> include provision for PLPMTUD for each
        supported transport protocol.</t>
      </section>
      <section title="For Middle Box Developers">
	<t>Middle boxes, numbered="true" toc="default">
        <name>For Middlebox Developers</name>
        <t>Middleboxes, which are systems that "transparently"
	perform policy functions on passing traffic but do not
	participate in the routing system, should process IP fragments
	in a manner that is consistent with <xref target="RFC0791"/> target="RFC0791" format="default"/>
	and <xref target="RFC8200"/>. target="RFC8200" format="default"/>.  In many cases, middle boxes middleboxes
	must maintain state in order to achieve this goal.</t>
        <t>Price and performance considerations frequently motivate network
        operators to deploy stateless middle boxes. middleboxes. These stateless middle
        boxes middleboxes
        may perform sub-optimally, suboptimally, process IP fragments in a manner that is not
        compliant with RFC 791 or RFC 8200, or even discard IP fragments
        completely. Such behaviors are NOT RECOMMENDED. <bcp14>NOT RECOMMENDED</bcp14>. If a
        middleboxes
        middlebox implements non-standard nonstandard behavior with respect to IP
        fragmentation, then that behavior MUST <bcp14>MUST</bcp14> be clearly
        documented.</t>
      </section>
      <section anchor="lagrec"
               title="For numbered="true" toc="default">
        <name>For ECMP, LAG LAG, and Load-Balancer Developers And Operators"> Operators</name>
        <t>In their default configuration, when the IPv6 Flow Label is not
        equal to zero, IPv6 devices that implement Equal-Cost Multipath (ECMP)
        Routing as described in <xref target="RFC2328">OSPF</xref> target="RFC2328" format="default">OSPF</xref>
        and other routing protocols, <xref target="RFC7424">Link target="RFC7424" format="default">Link
        Aggregation Grouping (LAG)</xref>, or other load-distribution
        technologies SHOULD <bcp14>SHOULD</bcp14> accept only the following fields as input to their
        hash algorithm:</t>

        <t><list style="symbols">
            <t>IP
        <ul spacing="normal">
          <li>IP Source Address.</t>

            <t>IP Address.</li>
          <li>IP Destination Address.</t>

            <t>Flow Label.</t>
          </list></t> Address.</li>
          <li>Flow Label.</li>
        </ul>
        <t>Operators SHOULD <bcp14>SHOULD</bcp14> deploy these devices in their
        default configuration.</t>
        <t>These recommendations are similar to those presented in <xref
        target="RFC6438"/> target="RFC6438" format="default"/> and <xref target="RFC7098"/>. target="RFC7098" format="default"/>. They differ in that
        they specify a default configuration.</t>
      </section>
      <section title="For numbered="true" toc="default">
        <name>For Network Operators"> Operators</name>
        <t>Operators MUST <bcp14>MUST</bcp14> ensure proper PMTUD operation in their network,
        including making sure the network generates PTB packets when
        dropping packets too large compared to outgoing interface
        MTU. However, implementations MAY <bcp14>MAY</bcp14> rate limit the generation of
        ICMP messages as per <xref target="RFC1812"/> target="RFC1812" format="default"/> and <xref
        target="RFC4443"/>.</t> target="RFC4443" format="default"/>.</t>
        <t>As per RFC 4890, network operators MUST NOT <bcp14>MUST NOT</bcp14> filter ICMPv6 PTB
        messages unless they are known to be forged or otherwise illegitimate.
        As stated in <xref target="PTB"/>, target="PTB" format="default"/>, filtering ICMPv6 PTB packets causes
        PMTUD to fail. Many upper-layer protocols rely on PMTUD.</t>
        <t>As per RFC 8200, network operators MUST NOT <bcp14>MUST NOT</bcp14> deploy IPv6 links whose
        MTU is less than 1280 bytes.</t> octets.</t>
        <t>Network operators SHOULD NOT <bcp14>SHOULD NOT</bcp14> filter IP fragments if they are known
        to have originated at a domain name server or be destined for a domain
        name server. This is because domain name services are critical to
        operation of the Internet.</t>
      </section>
    </section>
    <section title="IANA Considerations"> numbered="true" toc="default">
      <name>IANA Considerations</name>
      <t>This document makes has no request of IANA.</t> IANA actions.</t>
    </section>
    <section title="Security Considerations"> numbered="true" toc="default">
      <name>Security Considerations</name>
      <t>This document mitigates some of the security considerations
      associated with IP fragmentation by discouraging its use. It does not
      introduce any new security vulnerabilities, because it does not
      introduce any new alternatives to IP fragmentation. Instead, it
      recommends well-understood alternatives.</t>
    </section>

    <section title="Acknowledgements">
      <t>Thanks to Mikael Abrahamsson, Brian Carpenter, Silambu Chelvan,
      Joel Halpern, Lorenzo Colitti, Gorry Fairhurst, Mike Heard, Tom
      Herbert, Tatuya Jinmei, Suresh Krishnan, Jen Linkova, Paolo
      Lucente, Manoj Nayak, Eric Nygren, Fred Templin and Joe Touch for
      their comments.
      </t>
    </section>
  </middle>
  <back>
    <references title="Normative References">
      <?rfc include="reference.RFC.2119"?>

      <?rfc include='reference.RFC.8174'?>

      <?rfc include='reference.RFC.8085'?>

      <?rfc include='reference.RFC.8200'?>

      <?rfc include='reference.RFC.0791'?>

      <?rfc include='reference.RFC.8201'?>

      <?rfc include='reference.RFC.4821'?>

      <?rfc include='reference.RFC.1191'?>

      <?rfc include='reference.RFC.0792'?>

      <?rfc include='reference.RFC.0793'?>

      <?rfc include='reference.RFC.0768'?>

      <?rfc include='reference.RFC.1035'?>

      <?rfc include='reference.RFC.4443'?>

      <?rfc include='reference.RFC.6437'?>

      <?rfc include='reference.RFC.6438'?>

      <?rfc include='reference.I-D.ietf-tsvwg-datagram-plpmtud'?>
<displayreference target="I-D.ietf-intarea-tunnels" to="TUNNELS"/>
<displayreference target="I-D.ietf-tsvwg-udp-options" to="UDP-OPTIONS"/>
    <references>
      <name>References</name>
      <references>
        <name>Normative References</name>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8085.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8200.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.0791.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8201.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4821.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1191.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.0792.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.0793.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.0768.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1035.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4443.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.6437.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.6438.xml"/>

<reference anchor="RFC8899" target="https://www.rfc-editor.org/info/rfc8899">
  <front>
    <title>
    Packetization Layer Path MTU Discovery for Datagram Transports
    </title>
    <author initials="G" surname="Fairhurst" fullname="Godred Fairhurst">
      <organization/>
    </author>
    <author initials="T" surname="Jones" fullname="Tom Jones">
      <organization/>
    </author>
    <author initials="M" surname="Tüxen" fullname="Michael Tüxen">
       <organization/>
    </author>
    <author initials="I" surname="Rüngeler" fullname="Irene Rüngeler">
      <organization/>
    </author>
    <author initials="T" surname="Völker" fullname="Timo Völker">
      <organization/>
    </author>
    <date month="September" year="2020"/>
   </front>
   <seriesInfo name="RFC" value="8899"/>
   <seriesInfo name="DOI" value="10.17487/RFC8899"/>
</reference>
      </references>

    <references title="Informative References">
      <?rfc include='reference.RFC.7872'?>

      <?rfc include='reference.RFC.1122'?>

      <?rfc include='reference.RFC.1812'?>

      <?rfc include='reference.RFC.1858'?>

      <?rfc include='reference.RFC.2473'?>

      <?rfc include='reference.RFC.4960'?>

      <?rfc include='reference.RFC.5927'?>

      <?rfc include='reference.RFC.6346'?>

      <?rfc include='reference.RFC.4340'?>

      <?rfc include='reference.RFC.2003'?>

      <?rfc include='reference.RFC.5340'?>

      <?rfc include='reference.RFC.4890'?>

      <?rfc include='reference.RFC.2784'?>

      <?rfc include='reference.RFC.7676'?>

      <?rfc include='reference.RFC.5722'?>

      <?rfc include='reference.RFC.7739'?>

      <?rfc include='reference.RFC.7588'?>

      <?rfc include='reference.RFC.8086'?>

      <?rfc include='reference.RFC.4459'?>

      <?rfc include='reference.RFC.6888'?>

      <?rfc include='reference.RFC.4963'?>

      <?rfc include='reference.RFC.2328'?>

      <?rfc include='reference.RFC.5326'?>

      <?rfc include='reference.I-D.ietf-tsvwg-udp-options'?>

      <?rfc include='reference.I-D.ietf-intarea-tunnels'?>

      <?rfc include='reference.RFC.3128'?>

      <?rfc include='reference.RFC.7098'?>
      <?rfc include='reference.RFC.7424'?>
      <references>
        <name>Informative References</name>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7872.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1122.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1812.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1858.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2473.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4960.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5927.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.6346.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4340.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2003.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5340.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4890.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2784.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7676.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5722.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7739.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7588.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8086.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4459.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.6888.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4963.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2328.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5326.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-tsvwg-udp-options-08.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-intarea-tunnels-10.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3128.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7098.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.7424.xml"/>
        <reference anchor="Huston"> anchor="Huston" target="http://www.potaroo.net/ispcol/2017-08/xtn-hdrs.html">
          <front>
            <title>IPv6, Large UDP Packets and the DNS
          http://www.potaroo.net/ispcol/2017-08/xtn-hdrs.html</title> DNS</title>
            <author fullname="Geoff Huston" initials="G." surname="Huston">
              <organization/>
            </author>
            <date month="August" year="2017"/>
          </front>
        </reference>

        <reference anchor="Kent" target="http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-87-3.pdf">
          <front>
          <title>"Fragmentation
            <title>Fragmentation Considered Harmful", In Proc. SIGCOMM '87
          Workshop on Frontiers in Computer Communications Technology, DOI
          10.1145/55483.55524</title> Harmful</title>
            <author fullname="Kent" initials="C. " surname="Kent">
              <organization/>
            </author>
            <author fullname="Mogul" initials="J." surname="Mogul">
              <organization/>

            <address>
              <postal>
                <street/>

                <city/>

                <region/>

                <code/>

                <country/>
              </postal>

              <phone/>

              <facsimile/>

              <email/>

              <uri/>
            </address>
            </author>
            <date month="August" year="1987"/>
          </front>
          <seriesInfo name="DOI" value="10.1145/55482.55524"/>
          <refcontent>SIGCOMM '87: Proceedings of the ACM workshop on Frontiers in computer communications technology</refcontent>
        </reference>
        <reference anchor="Damas" target="http://www.potaroo.net/ispcol/2018-04/atr.html">
          <front>
            <title>Measuring ATR</title>
            <author fullname="Joao Damas" initials="J." surname="Damas">
              <organization/>
            </author>
            <author fullname="Geoff Huston" initials="G." surname="Huston">
              <organization/>
            </author>
            <date month="April" year="2018"/>
          </front>
        </reference>
        <reference anchor="Ptacek1998" target="http://www.aciri.org/vern/Ptacek-Newsham-Evasion-98.ps">
          <front>
            <title>Insertion, Evasion and Denial of Service: Eluding Network
          Intrusion Detection</title>
            <author fullname="T. H. Ptacek" initials="T. H." surname="Ptacek">
              <organization>Secure Networks, Inc.</organization>
            </author>
            <author fullname="T. N. Newsham" initials="T. N." surname="Newsham">
              <organization>Secure Networks, Inc.</organization>
            </author>
            <date year="1998"/>
          </front>
        </reference>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.1981.xml"/>
        <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2460.xml"/>
      </references>
    </references>

    <section title="Contributors' Address">
      <figure>
        <artwork>
</artwork>
      </figure>

      <t/> numbered="false" toc="default">
      <name>Acknowledgements</name>

      <t>Thanks to <contact fullname="Mikael Abrahamsson"/>,
      <contact fullname="Brian Carpenter"/>, <contact fullname="Silambu Chelvan"/>,
      <contact fullname="Lorenzo Colitti"/>,
      <contact fullname="Gorry Fairhurst"/>,
      <contact fullname="Joel Halpern"/>,
      <contact fullname="Mike Heard"/>,
      <contact fullname="Tom Herbert"/>, <contact fullname="Tatuya Jinmei"/>,
      <contact fullname="Suresh Krishnan"/>, <contact fullname="Jen Linkova"/>,
      <contact fullname="Paolo Lucente"/>, <contact fullname="Manoj Nayak"/>,
      <contact fullname="Eric Nygren"/>, <contact fullname="Fred Templin"/>, and
      <contact fullname="Joe Touch"/> for their comments.
      </t>
    </section>
  </back>
</rfc>