rfc8775xml2.original.xml | rfc8775.xml | |||
---|---|---|---|---|
<?xml version='1.0'?> | <?xml version="1.0" encoding="UTF-8"?> | |||
<!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [ | <!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent"> | |||
]> | ||||
<?rfc toc="yes"?> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="std" | |||
<?rfc tocompact="no"?> | consensus="true" docName="draft-ietf-pim-drlb-15" number="8775" | |||
<?rfc tocdepth="6"?> | ipr="trust200902" obsoletes="" updates="" submissionType="IETF" | |||
<?rfc symrefs="yes"?> | xml:lang="en" tocInclude="true" tocDepth="6" symRefs="true" | |||
<?rfc sortrefs="yes"?> | sortRefs="true" version="3"> | |||
<?rfc compact="yes"?> | ||||
<?rfc subcompact="no"?> | <!-- xml2rfc v2v3 conversion 2.39.0 --> | |||
<?rfc strict="yes" ?> | ||||
<rfc category="std" docName="draft-ietf-pim-drlb-15" | ||||
ipr="trust200902"> | ||||
<!-- ***** FRONT MATTER ***** --> | <!-- ***** FRONT MATTER ***** --> | |||
<front> | <front> | |||
<title abbrev="PIM Designated Router Load Balancing">PIM Designated Router | <title abbrev="PIM Designated Router Load Balancing">PIM Designated Router | |||
Load Balancing</title> | Load Balancing</title> | |||
<seriesInfo name="RFC" value="8775"/> | ||||
<author fullname="Yiqun Cai" initials="Y" surname="Cai"> | <author fullname="Yiqun Cai" initials="Y" surname="Cai"> | |||
<organization>Alibaba Group</organization> | <organization>Alibaba Group</organization> | |||
<address> | <address> | |||
<postal> | ||||
<street>520 Almanor Avenue</street> | ||||
<city>Sunnyvale</city><region>CA</region> | ||||
<code>94085</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>yiqun.cai@alibaba-inc.com</email> | <email>yiqun.cai@alibaba-inc.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author initials="H" surname="Ou" fullname="Heidi Ou"> | <author initials="H" surname="Ou" fullname="Heidi Ou"> | |||
<organization>Alibaba Group</organization> | <organization>Alibaba Group</organization> | |||
<address> | <address> | |||
<email>heidi.ou@alibaba-inc.com</email> | <postal> | |||
<street>520 Almanor Avenue</street> | ||||
<city>Sunnyvale</city><region>CA</region> | ||||
<code>94085</code> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>heidi.ou@alibaba-inc.com</email> | ||||
</address> | </address> | |||
</author> | </author> | |||
<author initials="S" surname="Vallepalli" fullname="Sri Vallepalli"> | <author initials="S" surname="Vallepalli" fullname="Sri Vallepalli"> | |||
<organization>Cisco Systems, Inc.</organization> | ||||
<address> | <address> | |||
<postal> | <email>vallepal@yahoo.com</email> | |||
<street>3625 Cisco Way</street> | ||||
<city>San Jose</city> | ||||
<code>CA 95134</code> | ||||
<country>USA</country> | ||||
</postal> | ||||
<email>svallepa@cisco.com</email> | ||||
</address> | </address> | |||
</author> | </author> | |||
<author initials="M" surname="Mishra" fullname="Mankamana Mishra"> | <author initials="M" surname="Mishra" fullname="Mankamana Mishra"> | |||
<organization>Cisco Systems, Inc.</organization> | <organization>Cisco Systems, Inc.</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>821 Alder Drive,</street> | <street>821 Alder Drive,</street> | |||
<city>Milpitas</city> | <city>Milpitas</city> | |||
<code>CA 95035</code> | <region>CA</region> | |||
<country>USA</country> | <code>95035</code> | |||
</postal> | <country>United States of America</country> | |||
<email>mankamis@cisco.com</email> | </postal> | |||
<email>mankamis@cisco.com</email> | ||||
</address> | </address> | |||
</author> | </author> | |||
<author initials="S" surname="Venaas" fullname="Stig Venaas"> | <author initials="S" surname="Venaas" fullname="Stig Venaas"> | |||
<organization>Cisco Systems, Inc.</organization> | <organization>Cisco Systems, Inc.</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Tasman Drive</street> | <street>Tasman Drive</street> | |||
<city>San Jose</city> | <city>San Jose</city> | |||
<code>CA 95134</code> | <region>CA</region> | |||
<country>USA</country> | <code>95134</code> | |||
<country>United States of America</country> | ||||
</postal> | </postal> | |||
<email>stig@cisco.com</email> | <email>stig@cisco.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author initials="A" surname="Green" fullname="Andy Green"> | <author initials="A" surname="Green" fullname="Andy Green"> | |||
<organization>British Telecom</organization> | <organization>British Telecom</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Adastral Park</street> | <street>Adastral Park</street> | |||
<city>Ipswich</city> | <city>Ipswich</city> | |||
<code>IP5 2RE</code> | <code>IP5 2RE</code> | |||
<country>United Kingdom</country> | <country>United Kingdom</country> | |||
</postal> | </postal> | |||
<email>andy.da.green@bt.com</email> | <email>andy.da.green@bt.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<date year="2020" month="April" /> | ||||
<date/> | ||||
<area>Routing</area> | <area>Routing</area> | |||
<keyword>Multicast</keyword> | <keyword>Multicast</keyword> | |||
<abstract> | <abstract> | |||
<t>On a multi-access network, one of the PIM-SM (PIM Sparse Mode) | <t>On a multi-access network, one of the PIM-SM (PIM Sparse Mode) | |||
routers is elected as a | routers is elected as a | |||
Designated Router. One of the responsibilities of the Designated Router | Designated Router. One of the responsibilities of the Designated Router | |||
is to track local multicast listeners and forward data to these | is to track local multicast listeners and forward data to these | |||
listeners if the group is operating in PIM-SM. This | listeners if the group is operating in PIM-SM. This | |||
document specifies a modification to the PIM-SM protocol that | document specifies a modification to the PIM-SM protocol that | |||
allows more than one of the PIM-SM routers to take on this responsibility | allows more than one of the PIM-SM routers to take on this responsibility | |||
so that the forwarding load can be distributed among multiple routers. | so that the forwarding load can be distributed among multiple routers. | |||
</t> | </t> | |||
</abstract> | </abstract> | |||
</front> | </front> | |||
<!-- ***** MIDDLE MATTER ***** --> | <!-- ***** MIDDLE MATTER ***** --> | |||
<middle> | <middle> | |||
<section title="Introduction"> | <section numbered="true" toc="default"> | |||
<t>On a multi-access LAN, such as an Ethernet, with one or more PIM-SM | <name>Introduction</name> | |||
(PIM Sparse Mode) <xref target="RFC7761"/> routers, one of the PIM-SM | <t>On a multi-access LAN (such as an Ethernet) with one or more PIM-SM | |||
(PIM Sparse Mode) <xref target="RFC7761" format="default"/> routers, one | ||||
of the PIM-SM | ||||
routers is elected as a Designated Router (DR). The PIM DR has two | routers is elected as a Designated Router (DR). The PIM DR has two | |||
responsibilities in the PIM-SM protocol. For any active sources on a LAN, | responsibilities in the PIM-SM protocol. For any active sources on a LAN, | |||
the PIM DR is responsible for registering with the Rendezvous Point (RP) | the PIM DR is responsible for registering with the Rendezvous Point (RP) | |||
if the group is operating in PIM-SM. Also, the PIM DR is responsible for | if the group is operating in PIM-SM. Also, the PIM DR is responsible for | |||
tracking local multicast listeners and forwarding to these listeners if | tracking local multicast listeners and forwarding data to these | |||
the group is operating in PIM-SM. | listeners if the group is operating in PIM-SM. | |||
</t> | ||||
<t>Consider the following LAN in Figure 1: | ||||
</t> | </t> | |||
<figure > | <t>Consider the following LAN in <xref target="LAN-REC" | |||
<preamble/> | format="default"/>:</t> | |||
<artwork ><![CDATA[ | <figure anchor="LAN-REC"> | |||
<name>LAN with Receivers</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
(core networks) | (core networks) | |||
| | | | | | | | |||
| | | | | | | | |||
R1 R2 R3 | R1 R2 R3 | |||
| | | | | | | | |||
----(LAN)---- | ----(LAN)---- | |||
| | | | |||
| | | | |||
(many receivers) | (many receivers) | |||
]]></artwork> | ||||
Figure 1: LAN with receivers | </figure> | |||
]]></artwork> | ||||
<postamble></postamble> | ||||
</figure> | ||||
<t>Assume R1 is elected as the DR. According to the | <t>Assume R1 is elected as the DR. According to the | |||
PIM-SM protocol, R1 will be responsible for forwarding traffic | PIM-SM protocol, R1 will be responsible for forwarding traffic | |||
to that LAN on behalf of all local members. In addition to keeping | to that LAN on behalf of all local members. In addition to keeping | |||
track of membership reports, R1 is also responsible for | track of membership reports, R1 is also responsible for | |||
initiating the creation of source and/or shared trees towards the | initiating the creation of source and/or shared trees towards the | |||
senders or the RPs. The membership reports would be IGMP or MLD | senders or the RPs. The membership reports would be IGMP or Multicast | |||
Listener Discovery (MLD) | ||||
messages. This applies to any versions of the IGMP and MLD protocols. | messages. This applies to any versions of the IGMP and MLD protocols. | |||
The most recent versions are IGMPv3 <xref target="RFC3376"/> and | The most recent versions are IGMPv3 <xref target="RFC3376" format="default | |||
MLDv2 <xref target="RFC3810"/>. | "/> and | |||
MLDv2 <xref target="RFC3810" format="default"/>. | ||||
</t> | </t> | |||
<t>Having a single router acting as DR and being responsible for | ||||
<t>Having a single router acting as DR and being responsible for data | data-plane forwarding leads to several issues. One of the issues is | |||
plane forwarding leads to several issues. One of the issues is that the | that the | |||
aggregated bandwidth will be limited to what R1 can handle with | aggregated bandwidth will be limited to what R1 can handle with | |||
regards to capacity of incoming links, the interface on the LAN, | regards to capacity of incoming links, the interface on the LAN, | |||
and total forwarding capacity. It is very common that a LAN consists of | and total forwarding capacity. It is very common that a LAN consists of | |||
switches that run IGMP/MLD or PIM snooping <xref target="RFC4541"/>. | switches that run IGMP/MLD or PIM snooping <xref target="RFC4541" | |||
format="default"/>. | ||||
This allows the forwarding of multicast packets to be | This allows the forwarding of multicast packets to be | |||
restricted only to segments leading to receivers that have indicated | restricted only to segments leading to receivers that have indicated | |||
their interest in multicast groups using either IGMP or MLD. The | their interest in multicast groups using either IGMP or MLD. The | |||
emergence of the switched Ethernet allows the aggregated bandwidth to | emergence of the switched Ethernet allows the aggregated bandwidth to | |||
exceed, sometimes by a large number, that of a single link. For | exceed, sometimes by a large number, that of a single link. For | |||
example, let us modify Figure 1 and introduce an Ethernet switch in | example, let us modify <xref target="LAN-REC" format="default"/> and | |||
Figure 2. | introduce an Ethernet switch in <xref target="LAN-SWITCH" | |||
format="default"/>. | ||||
</t> | </t> | |||
<figure> | <figure anchor="LAN-SWITCH"> | |||
<preamble/> | <name>LAN with Ethernet Switch</name> | |||
<artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
<![CDATA[ | ||||
(core networks) | (core networks) | |||
| | | | | | | | |||
| | | | | | | | |||
R1 R2 R3 | R1 R2 R3 | |||
| | | | | | | | |||
+=gi1===gi2===gi3=+ | +=gi1===gi2===gi3=+ | |||
+ + | + + | |||
+ switch + | + switch + | |||
+ + | + + | |||
+=gi4===gi5===gi6=+ | +=gi4===gi5===gi6=+ | |||
| | | | | | | | |||
H1 H2 H3 | H1 H2 H3 | |||
]]></artwork> | ||||
Figure 2: LAN with Ethernet Switch | ||||
]]> | ||||
</artwork> | ||||
<postamble></postamble> | ||||
</figure> | </figure> | |||
<t>Let us assume that each individual link is a Gigabit Ethernet. Each | <t>Let us assume that each individual link is a Gigabit Ethernet. Each | |||
router, R1, R2 and R3, and the switch have enough forwarding capacity | router (R1, R2, and R3) and the switch have enough forwarding capacity | |||
to handle hundreds of Gigabits of data. | to handle hundreds of gigabits of data. | |||
</t> | </t> | |||
<t>Let us further assume that each of the hosts requests 500 Mbps of | <t>Let us further assume that each of the hosts requests 500 Mbps of | |||
unique multicast data. This totals to 1.5 Gbps of data, which is less | unique multicast data. This totals to 1.5 Gbps of data, which is less | |||
than what each switch or the combined uplink bandwidth across the | than what each switch or the combined uplink bandwidth across the | |||
routers can handle, even under failure of a single router. | routers can handle, even under failure of a single router. | |||
</t> | </t> | |||
<t> On the other hand, the link between R1 and switch, via port gi1, can | <t> On the other hand, the link between R1 and switch, via port gi1, can | |||
only handle a throughput of 1Gbps. And if R1 is the only DR (the | only handle a throughput of 1 Gbps. And if R1 is the only DR (the | |||
PIM DR elected using the procedure defined by <xref target="RFC7761"/>) | PIM DR elected using the procedure defined by <xref target="RFC7761" | |||
format="default"/>), | ||||
at least 500 Mbps worth of data will be lost because the only link that | at least 500 Mbps worth of data will be lost because the only link that | |||
can be used to draw the traffic from the routers to the switch is via | can be used to draw the traffic from the routers to the switch is via | |||
gi1. In other words, the entire network's throughput is limited by the | gi1. In other words, the entire network's throughput is limited by the | |||
single connection between the PIM DR and the switch (or LAN as in | single connection between the PIM DR and the switch (or LAN, as in | |||
Figure 1). | <xref target="LAN-REC" format="default"/>). | |||
</t> | </t> | |||
<t>Another important issue is related to failover. If R1 is the only | <t>Another important issue is related to failover. If R1 is the only | |||
forwarder on a shared LAN, when R1 | forwarder on a shared LAN, when R1 | |||
goes out of service, multicast forwarding for the entire LAN has | goes out of service, multicast forwarding for the entire LAN has | |||
to be rebuilt by the newly elected PIM DR. However, if there were a | to be rebuilt by the newly elected PIM DR. However, if there were a | |||
way that allowed multiple routers to forward to the LAN for | way that allowed multiple routers to forward to the LAN for | |||
different groups, failure of one of the routers would only lead to | different groups, failure of one of the routers would only lead to | |||
disruption to a subset of the flows, therefore improving the overall | disruption to a subset of the flows, therefore improving the overall | |||
resilience of the network. | resilience of the network. | |||
</t> | </t> | |||
<t>This document specifies a modification to the PIM-SM protocol | <t>This document specifies a modification to the PIM-SM protocol | |||
that allows more than one of these routers, called Group Designated | that allows more than one of these routers, called Group Designated | |||
Routers (GDR) to be selected so that the forwarding load can be | Routers (GDRs), to be selected so that the forwarding load can be | |||
distributed among a number of routers. | distributed among a number of routers. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Terminology"> | <name>Terminology</name> | |||
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL | <t> | |||
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
"MAY", and "OPTIONAL" in this document are to be interpreted as | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL | |||
described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", | |||
when, and only when, they appear in all capitals, as shown here. | "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
</t> | "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are | |||
to be interpreted as | ||||
described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | ||||
when, and only when, they appear in all capitals, as shown here. | ||||
</t> | ||||
<t>With respect to PIM-SM, this document follows the terminology that | <t>With respect to PIM-SM, this document follows the terminology that | |||
has been defined in <xref target="RFC7761"/>. | has been defined in <xref target="RFC7761" format="default"/>. | |||
</t> | </t> | |||
<t> This document also introduces the following new acronyms: | <t> This document also introduces the following new acronyms: | |||
</t> | </t> | |||
<t> | <dl newline="false" spacing="normal"> | |||
<list style="symbols"> | <dt> GDR: Group Designated Router.</dt> | |||
<t> GDR: Group Designated Router. For each multicast | <dd>For each multicast | |||
flow, either a (*,G) for Any-Source Multicast (ASM), or an (S,G) | flow, either a (*,G) for Any-Source Multicast (ASM) or an (S,G) | |||
for Source-Specific Multicast (SSM) <xref target="RFC4607"/>, | for Source-Specific Multicast (SSM) <xref target="RFC4607" | |||
a Hash Algorithm (described below) is used to select one of the | format="default"/>, | |||
a hash algorithm (described below) is used to select one of the | ||||
routers as a GDR. The GDR is responsible for initiating the | routers as a GDR. The GDR is responsible for initiating the | |||
forwarding tree building process for the corresponding multicast | forwarding tree building process for the corresponding multicast | |||
flow. | flow. | |||
</t> | </dd> | |||
<t>GDR Candidate: a router that has the potential to | <dt>GDR Candidate:</dt> | |||
<dd>a router that has the potential to | ||||
become a GDR. There might be multiple GDR Candidates on a LAN, | become a GDR. There might be multiple GDR Candidates on a LAN, | |||
but only one can become the GDR for a specific multicast flow. | but only one can become the GDR for a specific multicast flow. | |||
</t> | </dd> | |||
</list> | </dl> | |||
</t> | ||||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Applicability"> | <name>Applicability</name> | |||
<t>The extension specified in this document applies to | <t>The extension specified in this document applies to | |||
PIM-SM routers acting as last hop routers (there are directly connected | PIM-SM routers acting as last-hop routers (there are directly connected | |||
receivers). It does not alter the behavior of a PIM DR, or any other | receivers). It does not alter the behavior of a PIM DR or any other | |||
routers, on the first hop network (directly connected sources). | routers on the first-hop network (directly connected sources). | |||
This is because the source tree is built using the IP address of the | This is because the source tree is built using the IP address of the | |||
sender, not the IP address of the PIM DR that sends PIM registers | sender, not the IP address of the PIM DR that sends PIM registers | |||
towards the RP. The load balancing between first hop routers can be | towards the RP. The load balancing between first-hop routers can be | |||
achieved naturally if an IGP provides equal cost multiple paths | achieved naturally if an IGP provides equal cost multiple paths | |||
(which it usually does in practice). Also distributing the load to do | (which it usually does in practice). Also, distributing the load to do | |||
source registration does not justify the additional complexity required | source registration does not justify the additional complexity required | |||
to support it. | to support it. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Functional Overview"> | <name>Functional Overview</name> | |||
<t>In the PIM DR election as defined in <xref target="RFC7761"/>, when | <t>In the PIM DR election as defined in <xref target="RFC7761" | |||
format="default"/>, when | ||||
multiple routers are connected to a multi-access LAN (for | multiple routers are connected to a multi-access LAN (for | |||
example, an Ethernet), one of them is elected to act as PIM DR. The | example, an Ethernet), one of them is elected to act as PIM DR. The | |||
PIM DR is responsible for sending local Join/Prune messages towards the | PIM DR is responsible for sending local Join/Prune messages towards the | |||
RP or source. In order to elect the PIM DR, each PIM router on the LAN | RP or source. In order to elect the PIM DR, each PIM router on the LAN | |||
examines the received PIM Hello messages and compares its own DR | examines the received PIM Hello messages and compares its own DR | |||
priority and IP address with those of its neighbors. The router with | priority and IP address with those of its neighbors. The router with | |||
the highest DR priority is the PIM DR. If there are multiple such | the highest DR priority is the PIM DR. If there are multiple such | |||
routers, their IP addresses are used as the tie-breaker, as described | routers, their IP addresses are used as the tiebreaker, as described | |||
in <xref target="RFC7761"/>. | in <xref target="RFC7761" format="default"/>. | |||
</t> | </t> | |||
<t> | <t> | |||
In order to share forwarding load among last hop routers, besides the | In order to share forwarding load among last-hop routers, besides the | |||
normal PIM DR election, one or more GDRs are elected on the | normal PIM DR election, one or more GDRs are elected on the | |||
multi-access LAN. There is only one PIM DR on the multi-access | multi-access LAN. There is only one PIM DR on the multi-access | |||
LAN, but there might be multiple GDR Candidates. | LAN, but there might be multiple GDR Candidates. | |||
</t> | </t> | |||
<t>For each multicast flow, that is, (*,G) for ASM and (S,G) for SSM, | <t>For each multicast flow, that is, (*,G) for ASM and (S,G) for SSM, | |||
a Hash Algorithm [<xref target="maskalgo"/>] is used to select one of | a hash algorithm (<xref target="maskalgo" format="default"/>) is used to | |||
the routers to be the GDR. | select one of the routers to be the GDR. | |||
The new DR Load Balancing Capability (DRLB-Cap) PIM Hello Option is | The new DR Load-Balancing Capability (DRLB-Cap) PIM Hello Option is | |||
used to announce the Capability as well as the Hash Algorithm type. | used to announce the Capability, as well as the hash algorithm type. | |||
Routers with the new DRLB-Cap Option advertised in their PIM Hello, | Routers with the new DRLB-Cap Option advertised in their PIM Hello, | |||
using the same GDR election Hash Algorithm and the same DR priority as | using the same GDR election hash algorithm and the same DR priority as | |||
the PIM DR, are considered as GDR Candidates. | the PIM DR, are considered as GDR Candidates. | |||
</t> | </t> | |||
<t>Hash Masks are defined for Source, Group and RP separately, in | <t>Hash masks are defined for Source, Group, and RP, separately, in | |||
order to handle PIM ASM/SSM. The masks, as well as a sorted list of | order to handle PIM ASM/SSM. The masks, as well as a sorted list of GDR | |||
GDR Candidate Addresses, are announced by the DR in a new DR Load | Candidate addresses, are announced by the DR in a new DR Load-Balancing | |||
Balancing List (DRLB-List) PIM Hello Option. | List (DRLB-List) PIM Hello Option. | |||
</t> | </t> | |||
<t>A Hash Algorithm based on the announced Source, Group, or RP masks | <t>A hash algorithm based on the announced Source, Group, or RP masks | |||
allows one GDR to be assigned to a corresponding multicast state. | allows one GDR to be assigned to a corresponding multicast state. | |||
That GDR is responsible for initiating the creation of the | That GDR is responsible for initiating the creation of the | |||
multicast forwarding tree for multicast traffic. | multicast forwarding tree for multicast traffic. | |||
</t> | </t> | |||
<section title="GDR Candidates"> | <section numbered="true" toc="default"> | |||
<name>GDR Candidates</name> | ||||
<t>GDR is the new concept introduced by this specification. GDR | <t>GDR is the new concept introduced by this specification. GDR | |||
Candidates are routers eligible for GDR election on the LAN. To | Candidates are routers eligible for GDR election on the LAN. To | |||
become a GDR Candidate, a router must have the same DR priority and | become a GDR Candidate, a router must have the same DR priority and | |||
run the same GDR election Hash Algorithm as the DR on the LAN. | run the same GDR election hash algorithm as the DR on the LAN. | |||
</t> | </t> | |||
<t>For example, assume there are 4 routers on the LAN: R1, R2, R3 and | <t>For example, assume there are 4 routers on the LAN: R1, R2, R3, and | |||
R4, each announcing a DRLB-Cap option. R1, R2 and R3 have the same | R4, each announcing a DRLB-Cap Option. R1, R2, and R3 have the same | |||
DR priority while R4's DR priority is less preferred. | DR priority, while R4's DR priority is less preferred. | |||
In this example, R4 will not be eligible for GDR election, because R4 | In this example, R4 will not be eligible for GDR election, because R4 | |||
will not become a PIM DR unless all of R1, R2 and R3 go out of | will not become a PIM DR unless all of R1, R2, and R3 go out of | |||
service. | service. | |||
</t> | </t> | |||
<t>Furthermore, assume router R1 wins the PIM DR election, R1 and R2 | <t>Furthermore, assume router R1 wins the PIM DR election, R1 and R2 | |||
advertise the same Hash Algorithm for GDR election, while R3 advertises | advertise the same hash algorithm for GDR election, while R3 advertises | |||
a different one. In this case, only R1 and R2 will be eligible for GDR | a different one. In this case, only R1 and R2 will be eligible for GDR | |||
election, while R3 will not. | election, while R3 will not. | |||
</t> | </t> | |||
<t>As a DR, R1 will include its own Load Balancing Hash Masks and | <t>As a DR, R1 will include its own Load-Balancing Hash Masks and | |||
the identity of R1 and R2 (the GDR Candidates) in its DRLB-List Hello | the identity of R1 and R2 (the GDR Candidates) in its DRLB-List Hello | |||
Option. | Option. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Protocol Specification"> | <name>Protocol Specification</name> | |||
<section title="Hash Mask and Hash Algorithm" anchor="maskalgo"> | <section anchor="maskalgo" numbered="true" toc="default"> | |||
<t>A Hash Mask is used to extract a number of bits from the | <name>Hash Mask and Hash Algorithm</name> | |||
<t>A hash mask is used to extract a number of bits from the | ||||
corresponding IP address field (32 for IPv4, 128 for IPv6) and | corresponding IP address field (32 for IPv4, 128 for IPv6) and | |||
calculate a hash value. A hash value is used to select a GDR from GDR | calculate a hash value. A hash value is used to select a GDR from GDR | |||
Candidates advertised by the PIM DR. Hash masks allow for certain flows | Candidates advertised by the PIM DR. Hash masks allow for certain flows | |||
to always be forwarded by the same GDR, by ignoring certain bits in the | to always be forwarded by the same GDR, by ignoring certain bits in the | |||
hash value calculation, so that the hash values are the same. For | hash value calculation, so that the hash values are the same. For | |||
example, 0.0.255.0 defines a | example, 0.0.255.0 defines a | |||
Hash Mask for an IPv4 address that masks the first, the second, and | hash mask for an IPv4 address that masks the first, second, and | |||
the fourth octets, which means that only the third octet will | fourth octets, which means that only the third octet will | |||
influence the hash value computed. Note that the masks need not | influence the hash value computed. Note that the masks need not | |||
be a contiguous set of bits. E.g, for IPv4, 15.15.15.15 would be a | be a contiguous set of bits. For example, for IPv4, 15.15.15.15 would be a | |||
valid mask. | valid mask. | |||
</t> | </t> | |||
<t> | <t> | |||
In the text below, a hash mask is in some places said to be zero. | In the text below, a hash mask is, in some places, said to be zero. | |||
A hash mask is zero if no bits are set. That is, | A hash mask is zero if no bits are set, that is, | |||
0.0.0.0 for IPv4 and :: for IPv6. Also, a hash mask is said to be | 0.0.0.0 for IPv4 and :: for IPv6. Also, a hash mask is said to be | |||
an all-bits-set mask if it is 255.255.255.255 for IPv4 or | an all-bits-set mask if it is 255.255.255.255 for IPv4 or | |||
ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6. | ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6. | |||
</t> | ||||
<t>There are three Hash Masks defined: | ||||
</t> | </t> | |||
<t> | <t>There are three hash masks defined: | |||
<list style="symbols"> | ||||
<t>RP Hash Mask</t> | ||||
<t>Source Hash Mask</t> | ||||
<t>Group Hash Mask</t> | ||||
</list> | ||||
</t> | </t> | |||
<ul spacing="normal"> | ||||
<li>RP Hash Mask</li> | ||||
<li>Source Hash Mask</li> | ||||
<li>Group Hash Mask</li> | ||||
</ul> | ||||
<t>The hash masks need to be configured on the PIM routers that can | <t>The hash masks need to be configured on the PIM routers that can | |||
potentially become a PIM DR, unless the implementation provides | potentially become a PIM DR, unless the implementation provides | |||
default hash mask values. | default hash mask values. | |||
An implementation SHOULD have default hash mask values as follows. | An implementation <bcp14>SHOULD</bcp14> have default hash mask values as | |||
The default RP Hash Mask SHOULD be zero (no bits set). The default | follows. | |||
Source and Group Hash Masks SHOULD both be all-bits-set masks. | The default RP Hash Mask <bcp14>SHOULD</bcp14> be zero (no bits set). The | |||
These default values are likely acceptable for most deployments, and | default | |||
Source and Group Hash Masks <bcp14>SHOULD</bcp14> both be all-bits-set ma | ||||
sks. | ||||
These default values are likely acceptable for most deployments and | ||||
simplify configuration. There is only a need to use other masks if | simplify configuration. There is only a need to use other masks if | |||
one needs to ensure that certain flows are forwarded by the same GDR. | one needs to ensure that certain flows are forwarded by the same GDR. | |||
</t> | </t> | |||
<t> | <t> | |||
The DRLB-List Hello Option contains a list of GDR Candidates. | The DRLB-List Hello Option contains a list of GDR Candidates. | |||
The first one listed has ordinal number 0, the second listed | The first one listed has ordinal number 0, the second listed | |||
ordinal number 1, and the last one has ordinal number N - 1 if | ordinal number 1, and the last one has ordinal number N - 1 if | |||
there are N candidates listed. The hash value computed will be | there are N candidates listed. The hash value computed will be | |||
the ordinal number of the GDR Candidate that is acting as GDR for | the ordinal number of the GDR Candidate that is acting as GDR for | |||
the flow in question. | the flow in question. | |||
</t> | </t> | |||
<t>The input to be hashed is determined as follows: | <t>The input to be hashed is determined as follows: | |||
<list style="symbols"> | </t> | |||
<t>If the group is in ASM mode and the RP Hash Mask announced by | <ul spacing="normal"> | |||
<li>If the group is in ASM mode and the RP Hash Mask announced by | ||||
the PIM DR is not zero (at least one bit is set), calculate the | the PIM DR is not zero (at least one bit is set), calculate the | |||
value of hashvalue_RP [<xref target="algorithm"/>] to determine | value of hashvalue_RP (<xref target="algorithm" format="default"/>) t o determine | |||
the GDR. | the GDR. | |||
</t> | </li> | |||
<t>If the group is in ASM mode and the RP Hash Mask announced by | <li>If the group is in ASM mode and the RP Hash Mask announced by | |||
the PIM DR is zero (no bits are set), obtain the value of | the PIM DR is zero (no bits are set), obtain the value of | |||
hashvalue_Group [<xref target="algorithm"/>] to determine the | hashvalue_Group (<xref target="algorithm" format="default"/>) to det ermine the | |||
GDR. | GDR. | |||
</t> | </li> | |||
<t>If the group is in SSM mode, use | <li>If the group is in SSM mode, use | |||
hashvalue_SG [<xref target="algorithm"/>] to determine the GDR. | hashvalue_SG (<xref target="algorithm" format="default"/>) to determ | |||
</t> | ine the GDR. | |||
</list> | </li> | |||
</t> | </ul> | |||
<t> | <t> | |||
A simple Modulo Hash Algorithm is defined in this document. | A simple modulo hash algorithm is defined in this document. | |||
However, to allow another Hash Algorithms to be used, a 1-octet | However, to allow another hash algorithm to be used, a 1-octet | |||
"Hash Algorithm" field is included in the DRLB-Cap Hello Option to | "Hash Algorithm" field is included in the DRLB-Cap Hello Option to | |||
specify the Hash Algorithm used by the router. | specify the hash algorithm used by the router. | |||
</t> | </t> | |||
<t>If different Hash Algorithms are advertised among the routers | <t>If different hash algorithms are advertised among the routers | |||
on a LAN, only the routers advertising the same Hash Algorithm | on a LAN, only the routers advertising the same hash algorithm | |||
as the DR (as well as having the same DR priority as the DR) are | as the DR (as well as having the same DR priority as the DR) are | |||
eligible for GDR election. | eligible for GDR election. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="Modulo Hash Algorithm" anchor="algorithm"> | <section anchor="algorithm" numbered="true" toc="default"> | |||
<t> | <name>Modulo Hash Algorithm</name> | |||
<t> | ||||
As part of computing the hash, the notation LSZC(hash_mask) is used | As part of computing the hash, the notation LSZC(hash_mask) is used | |||
to denote the number of zeroes | to denote the number of zeroes | |||
counted from the least significant bit of a Hash Mask | counted from the least significant bit of a hash mask | |||
hash_mask. As an example, LSZC(255.255.128) is 7 and | hash_mask. As an example, LSZC(255.255.128) is 7 and | |||
also LSZC(ffff:8000::) is 111. If all bits are set, LSZC will | LSZC(ffff:8000::) is 111. If all bits are set, LSZC will | |||
be 0. If the mask is zero, then | be 0. If the mask is zero, then | |||
LSZC will be 32 for IPv4, and 128 for IPv6. | LSZC will be 32 for IPv4 and 128 for IPv6. | |||
</t> | </t> | |||
<t> | <t> | |||
The number of GDR Candidates is denoted as GDRC. | The number of GDR Candidates is denoted as GDRC. | |||
</t> | </t> | |||
<t> | <t> | |||
The idea behind the Modulo Hash Algorithm is in simple terms | The idea behind the modulo hash algorithm is, in simple terms, | |||
that the corresponding mask is applied to a value, then the result | that the corresponding mask is applied to a value, then the result | |||
is shifted right LSZC(mask) bits so that the least significant bits | is shifted right LSZC(mask) bits so that the least significant bits | |||
that were masked out are not considered. Then this result is masked | that were masked out are not considered. Then, this result is masked | |||
by 0xffffffff, keeping only the last 32 bits of the result | by 0xffffffff, keeping only the last 32 bits of the result | |||
(this only makes a difference for IPv6). Finally, the hash value is | (this only makes a difference for IPv6). Finally, the hash value is | |||
this result modulo the number of GDR Candidates (GDRC). | this result modulo the number of GDR Candidates (GDRC). | |||
</t> | </t> | |||
<t> | <t> | |||
The Modulo Hash Algorithm for computing the values hashvalue_RP, | The modulo hash algorithm, for computing the values hashvalue_RP, | |||
hashvalue_Group and hashvalue_SG is defined as follows. | hashvalue_Group, and hashvalue_SG, is defined as follows. | |||
</t> | </t> | |||
<t> | <t> | |||
hashvalue_RP is calculated as: | hashvalue_RP is calculated as: | |||
<list style = "empty"> | ||||
<t> | ||||
(((RP_address & RP_mask) >> LSZC(RP_mask)) | ||||
& 0xffffffff) % GDRC | ||||
</t> | ||||
<t>RP_address is the address of the RP defined for the group | ||||
and RP_mask is the RP Hash Mask. | ||||
</t> | ||||
</list> | ||||
</t> | </t> | |||
<t> | <artwork> | |||
(((RP_address & RP_mask) >> LSZC(RP_mask)) & 0xffffffff) % GDRC | ||||
</artwork> | ||||
<ul empty="true"> | ||||
<li>RP_address is the address of the RP defined for the group, | ||||
and RP_mask is the RP Hash Mask.</li> | ||||
</ul> | ||||
<t> | ||||
hashvalue_Group is calculated as: | hashvalue_Group is calculated as: | |||
<list style = "empty"> | </t> | |||
<t> | <artwork> | |||
(((Group_address & Group_mask) >> LSZC(Group_mask)) | (((Group_address & Group_mask) >> LSZC(Group_mask)) & 0xfffffff | |||
& 0xffffffff) % GDRC | f) | |||
</t> | % GDRC | |||
<t> | </artwork> | |||
Group_address is the group address and Group_mask is the | <ul empty="true"> | |||
Group Hash Mask. | <li> | |||
</t> | Group_address is the group address, and Group_mask is the | |||
</list> | Group Hash Mask.</li> | |||
</t> | </ul> | |||
<t> | ||||
<t> | ||||
hashvalue_SG is calculated as: | hashvalue_SG is calculated as: | |||
<list style = "empty"> | </t> | |||
<t> | <artwork> | |||
((((Source_address & Source_mask) >> LSZC(Source_mask)) | ((((Source_address & Source_mask) >> LSZC(Source_mask)) & | |||
& 0xffffffff) ^ | 0xffffffff) ^ (((Group_address & Group_mask) >> LSZC(Group_mask)) | |||
(((Group_address & Group_mask) >> LSZC(Group_mask)) | & 0xffffffff)) % GDRC | |||
& 0xffffffff)) % GDRC | </artwork> | |||
</t> | <ul empty="true"> | |||
<t> | <li> | |||
Group_address is the group address and Group_mask is the | Group_address is the group address, and Group_mask is the | |||
Group Hash Mask. | Group Hash Mask.</li> | |||
</t> | </ul> | |||
</list> | <section numbered="true" toc="default"> | |||
</t> | <name>Modulo Hash Algorithm Examples</name> | |||
<section title="Modulo Hash Algorithm Examples"> | <t>To help illustrate the algorithm, consider this example. | |||
<t>To help illustrate the algorithm, consider this example. | ||||
Router X with IPv4 address 203.0.113.1 receives a DRLB-List | Router X with IPv4 address 203.0.113.1 receives a DRLB-List | |||
Hello Option from the DR, which announces RP Hash | Hello Option from the DR that announces RP Hash | |||
Mask 0.0.255.0 and a list of GDR Candidates, sorted by IP | Mask 0.0.255.0 and a list of GDR Candidates, sorted by IP | |||
addresses from high to low: 203.0.113.3, 203.0.113.2 and | addresses from high to low: 203.0.113.3, 203.0.113.2, and | |||
203.0.113.1. The ordinal number assigned to those addresses | 203.0.113.1. The ordinal number assigned to those addresses | |||
would be: | would be: | |||
</t> | </t> | |||
<t>0 for 203.0.113.3; 1 for 203.0.113.2; 2 for 203.0.113.1 | <t> | |||
(Router X). | 0 for 203.0.113.3; 1 for 203.0.113.2; 2 for 203.0.113.1 | |||
</t> | (Router X).</t> | |||
<t>Assume there are 2 RPs: RP1 192.0.2.1 for Group1 and RP2 | ||||
198.51.100.2 for Group2. Following the modulo Hash Algorithm: | <t>Assume there are 2 RPs: RP1 192.0.2.1 for Group1 and RP2 | |||
</t> | 198.51.100.2 for Group2. Following the modulo hash algorithm: | |||
<t>LSZC(0.0.255.0) is 8 and GDRC is 3. | </t> | |||
<ul spacing="normal"> | ||||
<li>LSZC(0.0.255.0) is 8, and GDRC is 3. | ||||
The hashvalue_RP for Group1 with RP RP1 is: | The hashvalue_RP for Group1 with RP RP1 is: | |||
</t> | </li> | |||
<t>(((192.0.2.1 & 0.0.255.0) >> 8) & 0xffffffff % 3) = | </ul> | |||
2 % 3 = 2 | <ul empty="true"> | |||
</t> | <li> | |||
<t>which matches the ordinal number assigned to Router X. | <artwork> | |||
Router X will be the GDR for Group1. | (((192.0.2.1 & 0.0.255.0) >> 8) & 0xffffffff % 3) | |||
</t> | = 2 % 3 | |||
<t>The hashvalue_RP for Group2 with RP RP2 is: | = 2 | |||
</t> | </artwork> | |||
<t>(((198.51.100.2 & 0.0.255.0) >> 8) & 0xffffffff % 3) = | </li> | |||
100 % 3 = 1 | <li>This matches the ordinal number assigned to Router X. | |||
</t> | Router X will be the GDR for Group1.</li> | |||
<t>which is different from the ordinal number of Router X (2). | </ul> | |||
Hence, Router X will not be GDR for Group2. | <ul spacing="normal"> | |||
</t> | <li>The hashvalue_RP for Group2 with RP RP2 is:</li> | |||
<t>For IPv6 consider this example, similar to the above. | </ul> | |||
<ul empty="true"> | ||||
<li> | ||||
<artwork> | ||||
(((198.51.100.2 & 0.0.255.0) >> 8) & 0xffffffff % 3) | ||||
= 100 % 3 | ||||
= 1 | ||||
</artwork> | ||||
</li> | ||||
<li>This is different from the ordinal number of Router X (2). | ||||
Hence, Router X will not be GDR for Group2.</li> | ||||
</ul> | ||||
<t>For IPv6, consider this example, similar to the above. | ||||
Router X with IPv6 address fe80::1 receives a DRLB-List | Router X with IPv6 address fe80::1 receives a DRLB-List | |||
Hello Option from the DR, which announces RP Hash | Hello Option from the DR that announces RP Hash | |||
Mask ::ffff:ffff:ffff:0 and a list of GDR Candidates, sorted by IP | Mask ::ffff:ffff:ffff:0 and a list of GDR Candidates, sorted by IP | |||
addresses from high to low: fe80::3, fe80::2 and fe80::1. | addresses from high to low: fe80::3, fe80::2, and fe80::1. | |||
The ordinal number assigned to those addresses would be: | The ordinal number assigned to those addresses would be: | |||
</t> | </t> | |||
<t>0 for fe80::3; 1 for fe80::2; 2 for fe80::1 (Router X). | <ul empty="true"> | |||
</t> | <li>0 for fe80::3; 1 for fe80::2; 2 for fe80::1 (Router X).</li> | |||
<t>Assume there are 2 RPs: RP1 2001:db8::1:0:5678:1 for Group1 and | </ul> | |||
<t>Assume there are 2 RPs: RP1 2001:db8::1:0:5678:1 for Group1 and | ||||
RP2 2001:db8::1:0:1234:2 for Group2. | RP2 2001:db8::1:0:1234:2 for Group2. | |||
Following the modulo Hash Algorithm: | Following the modulo hash algorithm: | |||
</t> | </t> | |||
<t>LSZC(::ffff:ffff:ffff:0) is 16 and GDRC is 3. | <ul spacing="normal"> | |||
The hashvalue_RP for Group1 with RP RP1 is: | <li>LSZC(::ffff:ffff:ffff:0) is 16, and GDRC is 3. | |||
</t> | The hashvalue_RP for Group1 with RP RP1 is:</li> | |||
<t>(((2001:db8::1:0:5678:1 & ::ffff:ffff:ffff:0) >> 16) & | </ul> | |||
0xffffffff % 3) = | <ul empty="true"> | |||
((::1:0:5678:0 >> 16) & 0xffffffff % 3) = | <li> | |||
(::1:0:5678 & 0xffffffff % 3) = ::5678 % 3 = 2 | <artwork> | |||
</t> | (((2001:db8::1:0:5678:1 & ::ffff:ffff:ffff:0) >> 16) & | |||
<t>which matches the ordinal number assigned to Router X. | 0xffffffff % 3) | |||
Router X will be the GDR for Group1. | = ((::1:0:5678:0 >> 16) & 0xffffffff % 3) | |||
</t> | = (::1:0:5678 & 0xffffffff % 3) | |||
<t>The hashvalue_RP for Group2 with RP RP2 is: | = ::5678 % 3 | |||
</t> | = 2 | |||
<t>(((2001:db8::1:0:1234:1 & ::ffff:ffff:ffff:0) >> 16) & | </artwork> | |||
0xffffffff % 3) = | </li> | |||
((::1:0:1234:0 >> 16) & 0xffffffff % 3) = | <li>This matches the ordinal number assigned to Router X. | |||
(::1:0:1234 & 0xffffffff % 3) = ::1234 % 3 = 1 | Router X will be the GDR for Group1.</li> | |||
</t> | </ul> | |||
<t>which is different from the ordinal number of Router X (2). | <ul spacing="normal"> | |||
Hence, Router X will not be GDR for Group2. | <li>The hashvalue_RP for Group2 with RP RP2 is:</li> | |||
</t> | </ul> | |||
</section> | <ul empty="true"> | |||
<section title="Limitations"> | <li> | |||
<artwork> | ||||
(((2001:db8::1:0:1234:1 & ::ffff:ffff:ffff:0) >> 16) & | ||||
0xffffffff % 3) | ||||
= ((::1:0:1234:0 >> 16) & 0xffffffff % 3) | ||||
= (::1:0:1234 & 0xffffffff % 3) | ||||
= ::1234 % 3 | ||||
= 1 | ||||
</artwork> | ||||
</li> | ||||
<li>This is different from the ordinal number of Router X (2). | ||||
Hence, Router X will not be GDR for Group2.</li> | ||||
</ul> | ||||
</section> | ||||
<section numbered="true" toc="default"> | ||||
<name>Limitations</name> | ||||
<t> | <t> | |||
The Modulo Hash Algorithm has poor failover characteristics when | The modulo hash algorithm has poor failover characteristics when | |||
a shared LAN has more than two GDRs. In the | a shared LAN has more than two GDRs. In the | |||
case of more than two GDRs on a LAN, when one GDR fails, all | case of more than two GDRs on a LAN, when one GDR fails, all | |||
of the groups may be reassigned to a different GDR, even if | of the groups may be reassigned to a different GDR, even if | |||
they were not assigned to the failed GDR. However, many | they were not assigned to the failed GDR. However, many | |||
deployments use only two routers on a shared LAN for redundancy | deployments use only two routers on a shared LAN for redundancy | |||
purposes. Future work may define new Hash Algorithms where only | purposes. Future work may define new hash algorithms where only | |||
groups assigned to the failed GDR get reassigned. | groups assigned to the failed GDR get reassigned. | |||
</t> | </t> | |||
<t>The Modulo Hash Algorithm will use at most 32 consecutive bits of | <t>The modulo hash algorithm will use, at most, 32 consecutive bits of | |||
the input addresses for its computation. Exactly which bits are | the input addresses for its computation. Exactly which bits are | |||
used of the source, group or RP addresses, depend on the respective | used of the source, group, or RP addresses depend on the respective | |||
masks. This limitation may be an issue for IPv6 deployments, | masks. This limitation may be an issue for IPv6 deployments, | |||
since not all bits of the IPv6 addresses are considered. If this | since not all bits of the IPv6 addresses are considered. If this | |||
causes operational issues, a new hash algorithm would need to be | causes operational issues, a new hash algorithm would need to be | |||
defined. | defined. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section title="PIM Hello Options"> | <section numbered="true" toc="default"> | |||
<name>PIM Hello Options</name> | ||||
<t>PIM routers include a new option, called | <t>PIM routers include a new option, called | |||
"Load Balancing Capability (DRLB-Cap)" in their PIM Hello messages. | "Load-Balancing Capability (DRLB-Cap)", in their PIM Hello messages. | |||
</t> | </t> | |||
<t>Besides this DRLB-Cap Hello Option, the elected PIM DR also | <t>Besides this DRLB-Cap Hello Option, the elected PIM DR also | |||
includes a new "DR Load Balancing List (DRLB-List) Hello Option". | includes a new "DR Load-Balancing List (DRLB-List) Hello Option". | |||
The DRLB-List Hello Option consists of three Hash Masks as defined | The DRLB-List Hello Option consists of three hash masks, as defined | |||
above and also a list of GDR Candidate addresses on the LAN. It is | above, and also a list of GDR Candidate addresses on the LAN. It is | |||
recommended that the GDR Candidate addresses are sorted in descending | recommended that the GDR Candidate addresses are sorted in descending | |||
order. This ensures that when using algorithms such as the Modulo | order. This ensures that when using algorithms, such as the modulo hash | |||
algorithm in this document, that it is predictable which GDR is | algorithm in this document, that it is predictable which GDR is | |||
responsible for which groups, regardless of the order the DR learned | responsible for which groups, regardless of the order the DR learned | |||
about the candidates. | about the candidates. | |||
</t> | </t> | |||
<section title="PIM DR Load Balancing Capability (DRLB-Cap) Hello | <section numbered="true" toc="default"> | |||
Option"> | <name>PIM DR Load-Balancing Capability (DRLB-Cap) Hello Option</name> | |||
<figure align="center"> | <figure anchor="PIM-CAP"> | |||
<artwork align="center"><![CDATA[ | <name>PIM DR Load-Balancing Capability Hello Option</name> | |||
<artwork align="center" name="" type="" alt=""><![CDATA[ | ||||
0 1 2 3 | 0 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Type = 34 | Length = 4 | | | Type = 34 | Length = 4 | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Reserved |Hash Algorithm | | | Reserved |Hash Algorithm | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
]]></artwork> | ||||
Figure 3: PIM DR Load Balancing Capability Hello Option | </figure> | |||
]]></artwork> | <dl newline="false" spacing="normal"> | |||
<postamble></postamble> | <dt>Type:</dt> | |||
</figure> | <dd>34</dd> | |||
<t> | <dt>Length:</dt> | |||
<list style="empty"> | <dd>4</dd> | |||
<t>Type: 34 | <dt>Reserved:</dt> | |||
</t> | <dd>Transmitted as zero, ignored on receipt.</dd> | |||
<t>Length: 4 | <dt>Hash Algorithm:</dt> | |||
</t> | <dd>Hash algorithm type. A value listed in the | |||
<t>Reserved: Transmitted as zero, ignored on receipt. | IANA "PIM Designated Router Load-Balancing Hash Algorithms" | |||
</t> | registry. 0 is used for the hash algorithm defined in this | |||
<t>Hash Algorithm: Hash Algorithm type. A value listed in the | ||||
IANA Designated Router Load Balancing Hash Algorithms | ||||
registry. 0 is used for the Modulo algorithm defined in this | ||||
document. | document. | |||
</t> | </dd> | |||
</list> | </dl> | |||
</t> | <t>This DRLB-Cap Hello Option <bcp14>MUST</bcp14> be advertised by rou | |||
<t>This DRLB-Cap Hello Option MUST be advertised by routers on | ters on | |||
all interfaces where DR Load Balancing is enabled. Note that the | all interfaces where DR Load Balancing is enabled. Note that the | |||
option is included at most once. | option is included, at most, once. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title = "PIM DR Load Balancing List (DRLB-List) Hello Option"> | <section numbered="true" toc="default"> | |||
<figure align="center"> | <name>PIM DR Load-Balancing List (DRLB-List) Hello Option</name> | |||
<artwork align="center"><![CDATA[ | <figure anchor="PIM-LIST"> | |||
0 1 2 3 | <name>PIM DR Load-Balancing List Hello Option</name> | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | <artwork align="center" name="" type="" alt=""><![CDATA[ | |||
0 1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Type = 35 | Length | | | Type = 35 | Length | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Group Mask | | | Group Mask | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Source Mask | | | Source Mask | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| RP Mask | | | RP Mask | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| GDR Candidate Address(es) | | | GDR Candidate Address(es) | | |||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
Figure 4: PIM DR Load Balancing List Hello Option | ||||
]]></artwork> | ]]></artwork> | |||
<postamble></postamble> | ||||
</figure> | </figure> | |||
<t> | <dl newline="false" spacing="normal"> | |||
<list style="empty"> | <dt>Type:</dt> | |||
<t>Type: 35</t> | <dd>35</dd> | |||
<t>Length: (3 + n) x (4 or 16) bytes, where n is the number | <dt>Length:</dt> | |||
of GDR candidates.</t> | <dd>(3 + n) x (4 or 16) bytes, where n is the number | |||
<t>Group Mask (32/128 bits): Mask applied to group addresses | of GDR Candidates.</dd> | |||
as part of hash computation.</t> | <dt>Group Mask (32/128 bits):</dt> | |||
<t> Source Mask (32/128 bits): Mask applied to source addresses | <dd>Mask applied to group addresses | |||
as part of hash computation.</t> | as part of hash computation.</dd> | |||
<t>RP Mask (32/128 bits): Mask applied to RP addresses | <dt> Source Mask (32/128 bits):</dt> | |||
as part of hash computation.</t> | <dd>Mask applied to source addresses | |||
<t> | as part of hash computation.</dd> | |||
<list style="empty"> | <dt>RP Mask (32/128 bits):</dt> | |||
<t>All masks MUST have the same number of bits as the IP | <dd>Mask applied to RP addresses | |||
source address in the PIM Hello IP header. | as part of hash computation.</dd> | |||
</t> | </dl> | |||
</list> | <t>All masks <bcp14>MUST</bcp14> have the same number of bits as th | |||
</t> | e IP | |||
<t>GDR Candidate Address(es) (32/128 bits): List of GDR | source address in the PIM Hello IP header. | |||
Candidate(s) | </t> | |||
<list style="empty"> | <dl newline="false" spacing="normal"> | |||
<t>All addresses MUST be in the same address family as the | <dt>GDR Candidate Address(es) (32/128 bits):</dt> | |||
<dd><t>List of GDR Candidate(s)</t> | ||||
<t>All addresses <bcp14>MUST</bcp14> be in the same address fami | ||||
ly as the | ||||
PIM Hello IP header. It is recommended that the addresses are | PIM Hello IP header. It is recommended that the addresses are | |||
sorted in descending order. | sorted in descending order. | |||
</t> | </t> | |||
<t>If the "Interface ID" option, as specified in | <t>If the "Interface ID" option, as specified in | |||
<xref target="RFC6395"/>, is present in a GDR Candidate's | <xref target="RFC6395" format="default"/>, is present in a GDR Ca | |||
PIM Hello message, and the "Router Identifier" portion is | ndidate's | |||
PIM Hello message and the "Router Identifier" portion is | ||||
non-zero: | non-zero: | |||
<list style="symbols"> | </t> | |||
<t>For IPv4, the "GDR Candidate Address" will be set directly | <ul spacing="normal"> | |||
<li>For IPv4, the "GDR Candidate Address" will be set direct | ||||
ly | ||||
to the "Router Identifier". | to the "Router Identifier". | |||
</t> | </li> | |||
<t>For IPv6, the "GDR Candidate Address" will be 96 bits of | <li>For IPv6, the "GDR Candidate Address" will be 96 bits of | |||
zeroes followed by the 32 bit Router Identifier. | zeroes, followed by the 32 bit Router Identifier. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | <t>If the "Interface ID" option is not present in a GDR | |||
<t>If the "Interface ID" option is not present in a GDR | Candidate's PIM Hello message or if the "Interface ID" | |||
Candidate' PIM Hello message, or if the "Interface ID" | ||||
option is present but the "Router Identifier" field is zero, | option is present but the "Router Identifier" field is zero, | |||
the "GDR Candidate Address" will be the IPv4 or IPv6 source | the "GDR Candidate Address" will be the IPv4 or IPv6 source | |||
address of the PIM Hello message. | address of the PIM Hello message. | |||
</t> | </t> | |||
<t>This DRLB-List Hello Option MUST only be advertised by the | <t>This DRLB-List Hello Option <bcp14>MUST</bcp14> only be adver | |||
elected PIM DR. It MUST be ignored if received from a non-DR. | tised by the | |||
The option MUST also be ignored if the hash masks are not | elected PIM DR. It <bcp14>MUST</bcp14> be ignored if received fro | |||
the correct number of bits, or GDR Candidate addresses are in | m a non-DR. | |||
The option <bcp14>MUST</bcp14> also be ignored if the hash masks | ||||
are not | ||||
the correct number of bits or GDR Candidate addresses are in | ||||
the wrong address family. | the wrong address family. | |||
</t> | </t> | |||
</list> | </dd></dl> | |||
</t> | </section> | |||
</list> | ||||
</t> | ||||
</section> | ||||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="PIM DR Operation"> | <name>PIM DR Operation</name> | |||
<t>The DR election process is still the same as defined in | <t>The DR election process is still the same as defined in | |||
<xref target="RFC7761"/>. The DR advertises the new DRLB-List Hello | <xref target="RFC7761" format="default"/>. The DR advertises the new DRLB -List Hello | |||
Option, which contains mask values from user configuration (or default | Option, which contains mask values from user configuration (or default | |||
values), followed by a list of GDR Candidate Addresses. Note that | values), followed by a list of GDR Candidate addresses. Note that | |||
if a router included the "Interface ID" option in the hello message, | if a router included the "Interface ID" option in the hello message | |||
and the Router ID is non-zero, the Router ID will be used to form the | and the Router ID is non-zero, the Router ID will be used to form the | |||
GDR Candidate address of the router, as discussed in the previous | GDR Candidate address of the router, as discussed in the previous | |||
section. It is recommended that the list be sorted, from the highest | section. It is recommended that the list be sorted from the highest | |||
value to the lowest value. The reason for sorting the list is to | value to the lowest value. The reason for sorting the list is to | |||
make the behavior deterministic, regardless of the order in which the | make the behavior deterministic, regardless of the order in which the | |||
DR learns of new candidates. Note that, as for non-DR routers, the DR | DR learns of new candidates. Note that, as for non-DR routers, the DR | |||
also advertises the DRLB-Cap Hello Option to indicate its ability to | also advertises the DRLB-Cap Hello Option to indicate its ability to | |||
support the new functionality and the type of GDR election Hash | support the new functionality and the type of GDR election hash | |||
Algorithm it uses. | algorithm it uses. | |||
</t> | </t> | |||
<t>If a PIM DR receives a neighbor DRLB-Cap Hello Option, which | <t>If a PIM DR receives a neighbor DRLB-Cap Hello Option that | |||
contains the same Hash Algorithm as the DR, and the neighbor has the | contains the same hash algorithm as the DR and the neighbor has the | |||
same DR priority as the DR, PIM DR SHOULD consider the neighbor as a | same DR priority as the DR, PIM DR <bcp14>SHOULD</bcp14> consider the nei | |||
GDR Candidate and insert the GDR Candidate' Address into the | ghbor as a | |||
GDR Candidate and insert the GDR Candidate's Address into the | ||||
list of the DRLB-List Option. However, the DR may have policies | list of the DRLB-List Option. However, the DR may have policies | |||
limiting which GDR Candidates, or the number of GDR Candidates to | limiting which or the number of GDR Candidates to | |||
include. Likewise, the DR SHOULD include itself in the list of GDR | include. Likewise, the DR <bcp14>SHOULD</bcp14> include itself in the lis | |||
Candidates, but it is permissible not to do so, if for instance there | t of GDR | |||
Candidates, but it is permissible not to do so, for instance, if there | ||||
is some policy restricting the candidate set. | is some policy restricting the candidate set. | |||
</t> | </t> | |||
<t>If a PIM neighbor included in the list expires, stops announcing | <t>If a PIM neighbor included in the list expires, stops announcing | |||
the DRLB-Cap Hello Option, changes DR priority, changes Hash Algorithm | the DRLB-Cap Hello Option, changes DR priority, changes hash algorithm, | |||
or otherwise becomes ineligible as a candidate, the DR SHOULD | or otherwise becomes ineligible as a candidate, the DR <bcp14>SHOULD</bcp | |||
14> | ||||
immediately send a triggered hello with a new list in the DRLB-List | immediately send a triggered hello with a new list in the DRLB-List | |||
option, excluding the neighbor. | option, excluding the neighbor. | |||
</t> | </t> | |||
<t>If a new router becomes eligible as a candidate, there is no | <t>If a new router becomes eligible as a candidate, there is no | |||
urgency in sending out an updated list. An updated list SHOULD be | urgency in sending out an updated list. An updated list <bcp14>SHOULD</bc | |||
p14> be | ||||
included in the next hello. | included in the next hello. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="PIM GDR Candidate Operation"> | <section numbered="true" toc="default"> | |||
<t>When an IGMP/MLD report is received, a Hash Algorithm is used by | <name>PIM GDR Candidate Operation</name> | |||
<t>When an IGMP/MLD report is received, a hash algorithm is used by | ||||
the GDR Candidates to determine which router is going to be responsible | the GDR Candidates to determine which router is going to be responsible | |||
for building forwarding trees on behalf of the host. | for building forwarding trees on behalf of the host. | |||
</t> | </t> | |||
<t>The router MUST include the DRLB-Cap Hello Option in all PIM Hello | <t>The router <bcp14>MUST</bcp14> include the DRLB-Cap Hello Option in a ll PIM Hello | |||
messages sent on the interface. Note that the presence of the | messages sent on the interface. Note that the presence of the | |||
DRLB-Cap Option in the PIM Hello does not guarantee that the router | DRLB-Cap Option in the PIM Hello does not guarantee that the router | |||
will be considered as a GDR candidate. Once the DR election is done, | will be considered as a GDR Candidate. Once the DR election is done, | |||
the DRLB-List Hello Option is received from the current PIM DR | the DRLB-List Hello Option is received from the current PIM DR | |||
containing a list of the selected GDRs Candidates. | containing a list of the selected GDR Candidates. | |||
</t> | </t> | |||
<t>A router only acts as a GDR Candidate if it is included in the GDR | <t>A router only acts as a GDR Candidate if it is included in the GDR | |||
Candidate list of the DRLB-List Hello Option. See next section for | Candidate list of the DRLB-List Hello Option. See next section for | |||
details. | details. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="DRLB-List Hello Option Processing"> | <section numbered="true" toc="default"> | |||
<t> | <name>DRLB-List Hello Option Processing</name> | |||
<t> | ||||
This section discusses processing of the DRLB-List Hello Option, | This section discusses processing of the DRLB-List Hello Option, | |||
including the case where it was received in the previous hello, | including the case where it was received in the previous hello | |||
but not in the current hello. | but not in the current hello. | |||
All routers MUST ignore the DRLB-List Hello Option if it is | All routers <bcp14>MUST</bcp14> ignore the DRLB-List Hello Option if it | |||
received from a PIM router which is not the DR. The option MUST | is | |||
only be processed by routers that are announcing the DRLB-Cap Option, | received from a PIM router that is not the DR. The option <bcp14>MUST</ | |||
and only if the Hash Algorithm announced by the DR is the same as | bcp14> | |||
only be processed by routers that are announcing the DRLB-Cap Option | ||||
and only if the hash algorithm announced by the DR is the same as | ||||
the local announcement. | the local announcement. | |||
All GDR Candidates MUST use the Hash Masks advertised in the Option, | All GDR Candidates <bcp14>MUST</bcp14> use the hash masks advertised | |||
in the Option, | ||||
even if they differ from those the candidate was configured with. | even if they differ from those the candidate was configured with. | |||
The DR MUST also process its own DRLB-List Hello Option. | The DR <bcp14>MUST</bcp14> also process its own DRLB-List Hello Option. | |||
</t> | </t> | |||
<t>A router stores the latest option contents that was announced, | <t>A router stores the latest option contents that were announced, | |||
if any, and deletes the previous contents. The router MUST also | if any, and deletes the previous contents. The router <bcp14>MUST</bcp14> | |||
compare the new contents with any previous contents, and if there | also | |||
compare the new contents with any previous contents and, if there | ||||
are any changes, continue processing as below. Note that if the | are any changes, continue processing as below. Note that if the | |||
option does not pass the above checks, the below processing MUST be | option does not pass the above checks, the below processing <bcp14>MUST</ bcp14> be | |||
done as if the option was not announced. | done as if the option was not announced. | |||
</t> | </t> | |||
<t> | <t> | |||
If the contents of the DRLB-List Option, the masks or the candidate | If the contents of the DRLB-List Option, the masks, or the candidate | |||
list, differs from the previously saved copy, it is received for the | list differ from the previously saved copy, it is received for the | |||
first time, or it is no longer being received or accepted, the | first time, or it is no longer being received or accepted, the | |||
option MUST be processed as below. | option <bcp14>MUST</bcp14> be processed as below. | |||
<list style="numbers"> | </t> | |||
<t>If the local router is included in the GDR Candidate Address(es) | <ol spacing="normal" type="1"> | |||
field (it will look for its own address, or its Router ID if it | <li> | |||
announces a non-zero Router ID), for each of the groups, or source | <t>If the local router is included in the "GDR Candidate | |||
and group pairs if the group is in SSM mode, with local receiver | Address(es)" field, it will look for its own address, or if it | |||
interest, the router MUST run the Hash Algorithm to determine | announces a non-zero Router ID, its own Router ID. For each of the | |||
which of them it is the GDR for. | groups or source and group pairs, if the group is in SSM mode | |||
<list style="symbol"> | with local receiver interest, the router <bcp14>MUST</bcp14> run | |||
<t>If there is no change in the GDR status, then no further | the hash algorithm to determine which of them is for the GDR. | |||
</t> | ||||
<ul spacing="normal"> | ||||
<li>If there is no change in the GDR status, then no further | ||||
action is required. | action is required. | |||
</t> | </li> | |||
<t>If the router becomes the new GDR, then a multicast | <li>If the router becomes the new GDR, then a multicast | |||
forwarding tree MUST be built <xref target="RFC7761"/>. | forwarding tree <bcp14>MUST</bcp14> be built <xref target="RFC7761" | |||
</t> | format="default"/>. | |||
<t> | </li> | |||
<li> | ||||
If the router is no longer the GDR, then it uses an Assert as | If the router is no longer the GDR, then it uses an Assert as | |||
explained in [<xref target="assert"/>]. | explained in <xref target="assert" format="default"/>. | |||
</t> | </li> | |||
</list> | </ul> | |||
</t> | </li> | |||
<t>If the local router is not included in the GDR Candidate | ||||
Address(es) field, or if the DRLB-List Hello Option is no longer | <li> | |||
included in the DR's Hello, or if the DR's Neighbor Liveness Timer | <t>If one of the following occurs:</t> | |||
expires <xref target="RFC7761"/>, for each of the groups, or | <ul> | |||
source and group pairs if the group is in SSM mode, with local | <li>the local router is not included in the "GDR Candidate | |||
receiver interest, for which the router is the GDR, it | Address(es)" field,</li> | |||
uses an Assert as explained in [<xref target="assert"/>]. | <li>the DRLB-List Hello Option is no longer included in the DR's | |||
</t> | Hello, or</li> | |||
</list> | <li>the DR's Neighbor Liveness Timer expires [RFC7761],</li> | |||
</t> | </ul> | |||
<t> | ||||
then for each group (or each source and group pair if the group | ||||
is in SSM mode) with local receiver interest, for which the | ||||
router is the GDR, the router uses an Assert as explained in | ||||
<xref target="assert"/>. | ||||
</t> | ||||
</li> | ||||
</ol> | ||||
</section> | </section> | |||
<section title="PIM Assert Modification" anchor="assert"> | <section anchor="assert" numbered="true" toc="default"> | |||
<t>GDR changes may occur due to configuration change, due to | <name>PIM Assert Modification</name> | |||
GDR candidates going down, and also new routers coming up and | <t>GDR changes may occur due to configuration change, | |||
becoming GDR candidates. This may occur while flows are being | GDR Candidates going down, and also new routers coming up and | |||
becoming GDR Candidates. This may occur while flows are being | ||||
forwarded. If the GDR for an active flow changes, there is likely | forwarded. If the GDR for an active flow changes, there is likely | |||
to be some disruption, such as packet loss or duplicates. | to be some disruption, such as packet loss or duplicates. | |||
By using asserts, packet loss is minimized, while allowing a small | By using asserts, packet loss is minimized while allowing a small | |||
amount of duplicates. | amount of duplicates. | |||
</t> | </t> | |||
<t>When a router stops acting as the GDR for a group, or source and | <t>When a router stops acting as the GDR for a group, or source and | |||
group pair if SSM, it MUST set the Assert metric preference to maximum | group pair if SSM, it <bcp14>MUST</bcp14> set the Assert metric preferenc | |||
e to maximum | ||||
(0x7fffffff) and the Assert metric to one less than maximum | (0x7fffffff) and the Assert metric to one less than maximum | |||
(0xfffffffe). That is, whenever it sends or receives an Assert for the | (0xfffffffe). That is, whenever it sends or receives an Assert for the | |||
group, it must use these values as the metric preference and metric | group, it must use these values as the metric preference and metric | |||
rather than the values provided by the unicast routing protocol. | rather than the values provided by the unicast routing protocol. | |||
</t> | </t> | |||
<t>The rest of this section is just for illustration purposes and | <t>The rest of this section is just for illustration purposes and | |||
not part of the protocol definition. | not part of the protocol definition. | |||
</t> | </t> | |||
<t>To illustrate the behavior when there is a GDR change, consider | <t>To illustrate the behavior when there is a GDR change, consider | |||
the following scenario where there are two flows | the following scenario where there are two flows: | |||
G1 and G2. R1 is the GDR for G1, and R2 is the GDR for G2. | G1 and G2. R1 is the GDR for G1, and R2 is the GDR for G2. | |||
When R3 comes up, it is possible that R3 becomes GDR for both | When R3 comes up, it is possible that R3 becomes GDR for both | |||
G1 and G2, hence R3 starts to build the forwarding tree for G1 and | G1 and G2; hence, R3 starts to build the forwarding tree for G1 and | |||
G2. If R1 and R2 stop forwarding before R3 completes the process, | G2. If R1 and R2 stop forwarding before R3 completes the process, | |||
packet loss might occur. On the other hand, if R1 and R2 continue | packet loss might occur. On the other hand, if R1 and R2 continue | |||
forwarding while R3 is building the forwarding trees, duplicates | forwarding while R3 is building the forwarding trees, duplicates | |||
might occur. | might occur. | |||
</t> | </t> | |||
<t>When the role of GDR changes as above, instead of immediately | <t>When the role of GDR changes as above, instead of immediately | |||
stopping forwarding, R1 and R2 continue forwarding to G1 and G2 | stopping forwarding, R1 and R2 continue forwarding to G1 and G2 | |||
respectively, while, at the same time, R3 build forwarding trees for | respectively, while, at the same time, R3 build forwarding trees for | |||
G1 and G2. This will lead to PIM Asserts. | G1 and G2. This will lead to PIM Asserts. | |||
</t> | </t> | |||
<t>For G1, using the functionality described in this document, R1 | <t>For G1, using the functionality described in this document, R1 | |||
and R3 determine the new GDR, which is R3. With the modified Assert | and R3 determine the new GDR, which is R3. With the modified Assert | |||
behavior, R1 sets its Assert metric to the near maximum value discussed | behavior, R1 sets its Assert metric to the near maximum value, as discuss | |||
above. That will make R3, which has normal metric in its Assert as | ed | |||
above. That will make R3, which has normal metric in its Assert, | ||||
the Assert winner. | the Assert winner. | |||
</t> | </t> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Backward Compatibility"> | <name>Backward Compatibility</name> | |||
<t>In the case of a hybrid Ethernet shared LAN (where some PIM routers | <t>In the case of a hybrid Ethernet shared LAN (where some PIM routers | |||
support the functionality defined in this document, and some do not); | support the functionality defined in this document and some do not): | |||
<list style="symbols"> | ||||
<t>If the DR does not support the new functionality, then there | ||||
will be no load-balancing. | ||||
</t> | ||||
<t>If non-DR routers do not support the new functionality, they | ||||
will not be considered as Candidate GDRs and it will not take part | ||||
in load-balancing. Load-balancing may still happen on the link. | ||||
</t> | ||||
</list> | ||||
</t> | </t> | |||
<ul spacing="normal"> | ||||
<li>If the DR does not support the new functionality, then there | ||||
will be no load balancing. | ||||
</li> | ||||
<li>If non-DR routers do not support the new functionality, they | ||||
will not be considered as GDR Candidate and will not take part | ||||
in load balancing. Load balancing may still happen on the link. | ||||
</li> | ||||
</ul> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section title="Operational Considerations"> | <section numbered="true" toc="default"> | |||
<name>Operational Considerations</name> | ||||
<t> | <t> | |||
An administrator needs to consider what the total bandwidth | An administrator needs to consider what the total bandwidth | |||
requirements are and find a set of routers that together has | requirements are and find a set of routers that together have | |||
enough available capacity, while making sure that each of the routers | enough available capacity while making sure that each of the routers | |||
can handle its part, assuming that the traffic is distributed | can handle its part, assuming that the traffic is distributed | |||
roughly equally among the routers. Ideally, one should also have | roughly equally among the routers. Ideally, one should also have | |||
enough bandwidth to handle the case where at least one router fails. | enough bandwidth to handle the case where at least one router fails. | |||
All routers should have reachability to the sources, and | All routers should have reachability to the sources and | |||
RPs if applicable, that is not via the LAN. | RPs, if applicable, that are not via the LAN. | |||
</t> | </t> | |||
<t>Care must be taken when choosing what hash masks to configure. One | <t>Care must be taken when choosing what hash masks to configure. One | |||
would typically configure the same masks on all the routers, so that | would typically configure the same masks on all the routers so that | |||
they are the same, regardless of which router is elected as DR. The | they are the same, regardless of which router is elected as DR. The | |||
default masks are likely suitable for most deployment. The RP Hash | default masks are likely suitable for most deployment. The RP Hash | |||
Mask must be configured (the default is no bits set) if one wishes to | Mask must be configured (the default is no bits set) if one wishes to | |||
hash based on the RP address rather than the group address for ASM. | hash based on the RP address rather than the group address for ASM. | |||
The default masks will use the entire group addresses, and source | The default masks will use the entire group addresses, and source | |||
addresses if SSM, as part of the hash. An administrator may set other | addresses if SSM, as part of the hash. An administrator may set other | |||
masks that masks out part of the addresses to ensure that certain | masks that mask out part of the addresses to ensure that certain | |||
flows always get hashed to the same router. How this is achieved depends | flows always get hashed to the same router. How this is achieved depends | |||
on how the group addresses are allocated. | on how the group addresses are allocated. | |||
</t> | </t> | |||
<t> | <t> | |||
Only the routers announcing the same Hash Algorithm as the DR | Only the routers announcing the same hash algorithm as the DR | |||
would be considered as GDR candidates. Network administrators | would be considered as GDR Candidates. Network administrators | |||
need to make sure that the desired set of routers announce the | need to make sure that the desired set of routers announce the | |||
same algorithm. Migration between different algorithms is | same algorithm. Migration between different algorithms is | |||
not considered in this document. | not considered in this document. | |||
</t> | </t> | |||
</section> | </section> | |||
<section title="IANA Considerations"> | <section numbered="true" toc="default"> | |||
<t>IANA has temporarily assigned type 34 for the PIM DR Load Balancing | <name>IANA Considerations</name> | |||
Capability (DRLB-Cap) Hello Option, and type 35 for the | <t>IANA has made these assignments in the "PIM-Hello Options" registry: | |||
PIM DR Load Balancing List (DRLB-List) Hello Option in the | value 34 for the PIM DR Load-Balancing Capability (DRLB-Cap) Hello | |||
PIM-Hello Options registry. IANA is requested | Option (with Length of 4), and value 35 for the PIM DR Load-Balancing | |||
to make these assignments permanent when this document is published | List (DRLB-List) Hello Option (with variable Length). | |||
as an RFC. Note that the option names have changed slightly since | </t> | |||
the temporary assignments were made. Also, the length of option 34 | <t> | |||
is always 4, the registry currently says it is variable. | Per this document, IANA has created a registry called | |||
</t><t> | "PIM Designated Router Load-Balancing Hash Algorithms" in the | |||
This document requests IANA to create a registry called | ||||
"Designated Router Load Balancing Hash Algorithms" in the | ||||
"Protocol Independent Multicast (PIM)" branch of the registry tree. | "Protocol Independent Multicast (PIM)" branch of the registry tree. | |||
The registry lists Hash Algorithms for use by PIM Designated Router | The registry lists hash algorithms for use by PIM Designated Router | |||
Load Balancing. | Load Balancing. | |||
</t> | </t> | |||
<section title="Initial registry"> | <section numbered="true" toc="default"> | |||
<name>Initial Registry</name> | ||||
<t> | <t> | |||
The initial content of the registry should be as follows. | The initial content of the registry is as follows. | |||
<figure> | ||||
<artwork> | ||||
<![CDATA[ | ||||
Type Name Reference | ||||
------ ---------------------------------------- -------------------- | ||||
0 Modulo This document | ||||
1-255 Unassigned | ||||
]]> | ||||
</artwork> | ||||
</figure> | ||||
</t> | </t> | |||
<table anchor="initial-reg" align="center"> | ||||
<thead> | ||||
<tr> | ||||
<th>Type</th> | ||||
<th>Name</th> | ||||
<th>Reference</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td>0</td> | ||||
<td>Modulo</td> | ||||
<td>RFC 8775</td> | ||||
</tr> | ||||
<tr> | ||||
<td>1-255</td> | ||||
<td>Unassigned</td> | ||||
<td></td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | </section> | |||
<section title="Assignment of new Hash Algorithms"> | <section numbered="true" toc="default"> | |||
<t>Assignment of new Hash Algorithms is done according to the "IETF | <name>Assignment of New Hash Algorithms</name> | |||
Review" model, see <xref target="RFC8126"/>. | <t>Assignment of new hash algorithms is done according to the "IETF | |||
Review" procedure; see <xref target="RFC8126" format="default"/>. | ||||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | ||||
<section title="Security Considerations"> | <name>Security Considerations</name> | |||
<t>Security of the new DR Load Balancing PIM Hello Options is only | <t>Security of the new DR Load-Balancing PIM Hello Options is only | |||
guaranteed by the security of PIM Hello messages, so the security | guaranteed by the security of PIM Hello messages, so the security | |||
considerations for PIM Hello messages as described in PIM-SM | considerations for PIM Hello messages, as described in PIM-SM | |||
<xref target="RFC7761"/> apply here. | <xref target="RFC7761" format="default"/>, apply here. | |||
</t> | </t> | |||
<t>If the DR is subverted it could omit or add certain GDRs or | <t>If the DR is subverted, it could omit or add certain GDRs or | |||
announce an unsupported algorithm. If another router is subverted, it | announce an unsupported algorithm. If another router is subverted, it | |||
could be made DR and cause similar issues. While these issues are | could be made DR and cause similar issues. While these issues are | |||
specific to this specification, they are not that different from existing | specific to this specification, they are not that different from existing | |||
attacks such as subverting a DR and lowering the DR priority, causing a | attacks, such as subverting a DR and lowering the DR priority, causing a | |||
different router to become the DR. | different router to become the DR. | |||
</t> | </t> | |||
<t>If for any reason, the DR includes a GDR in the announced list which | <t>If, for any reason, the DR includes a GDR in the announced list that | |||
announces a different algorithm from what the DR announces, the GDR | announces a different algorithm from what the DR announces, the GDR | |||
is required to ignore the announcement, and there will be no router | is required to ignore the announcement, and there will be no router | |||
acting as the DR for the flows that hash to that GDR. | acting as the DR for the flows that hash to that GDR. | |||
</t> | </t> | |||
<t>If a GDR is subverted, it could potentially be made to stop forwarding | <t>If a GDR is subverted, it could potentially be made to stop forwarding | |||
all the traffic it is expected to forward. This is also similar today to | all the traffic it is expected to forward. This is also similar today to | |||
if a DR is subverted. | if a DR is subverted. | |||
</t> | </t> | |||
<t>An administrator may be able to achieve the desired load-balancing | <t>An administrator may be able to achieve the desired load balancing | |||
of known flows, but an attacker may send a single high rate flow which | of known flows, but an attacker may send a single high rate flow that | |||
is served by a single GDR, or send multiple flows that are expected to | is served by a single GDR or send multiple flows that are expected to | |||
be hashed to the same GDR.</t> | be hashed to the same GDR.</t> | |||
</section> | </section> | |||
<section title="Acknowledgement"> | ||||
<t> | ||||
The authors would like to thank Steve Simlo and Taki Millonis for | ||||
helping with the original idea; Alia Atlas, Bill Atwood, Joe Clarke, | ||||
Alissa Cooper, Jake Holland, Bharat Joshi, Anish Kachinthaya, | ||||
Anvitha Kachinthaya, Benjamin Kaduk, Mirja Kuhlewind, Barry Leiba, | ||||
Ben Niven-Jenkins, Alvaro Retana, Adam Roach, | ||||
Michael Scharf, Eric Vyncke and Carl Wallace | ||||
for reviews and comments; and Toerless Eckert and Rishabh | ||||
Parekh for helpful conversation on the document. | ||||
</t> | ||||
</section> | ||||
</middle> | </middle> | |||
<!-- *****BACK MATTER ***** --> | <!-- *****BACK MATTER ***** --> | |||
<back> | <back> | |||
<references title='Normative References'> | <references> | |||
<?rfc include='reference.RFC.2119' ?> | <name>References</name> | |||
<?rfc include='reference.RFC.6395' ?> | <references> | |||
<?rfc include='reference.RFC.7761' ?> | <name>Normative References</name> | |||
<?rfc include='reference.RFC.8126' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
<?rfc include='reference.RFC.8174' ?> | ence.RFC.2119.xml"/> | |||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.6395.xml"/> | ||||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.7761.xml"/> | ||||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.8126.xml"/> | ||||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.8174.xml"/> | ||||
</references> | </references> | |||
<references title="Informative References"> | <references> | |||
<?rfc include='reference.RFC.3376' ?> | <name>Informative References</name> | |||
<?rfc include='reference.RFC.3810' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
<?rfc include='reference.RFC.4541' ?> | ence.RFC.3376.xml"/> | |||
<?rfc include='reference.RFC.4607' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
ence.RFC.3810.xml"/> | ||||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.4541.xml"/> | ||||
<xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
ence.RFC.4607.xml"/> | ||||
</references> | </references> | |||
</references> | ||||
<section numbered="false" toc="default"> | ||||
<name>Acknowledgements</name> | ||||
<t> | ||||
The authors would like to thank <contact fullname="Steve Simlo"/> and | ||||
<contact fullname="Taki Millonis"/> for | ||||
helping with the original idea; <contact fullname="Alia Atlas"/>, | ||||
<contact fullname="Bill Atwood"/>, <contact fullname="Joe Clarke"/>, | ||||
<contact fullname="Alissa Cooper"/>, <contact fullname="Jake | ||||
Holland"/>, <contact fullname="Bharat Joshi"/>, <contact | ||||
fullname="Anish Kachinthaya"/>, | ||||
<contact fullname="Anvitha Kachinthaya"/>, <contact fullname="Benjamin | ||||
Kaduk"/>, <contact fullname="Mirja Kühlewind"/>, <contact | ||||
fullname="Barry Leiba"/>, | ||||
<contact fullname="Ben Niven-Jenkins"/>, <contact fullname="Alvaro | ||||
Retana"/>, <contact fullname="Adam Roach"/>, | ||||
<contact fullname="Michael Scharf"/>, <contact fullname="Éric | ||||
Vyncke"/>, and <contact fullname="Carl Wallace"/> | ||||
for reviews and comments; and <contact fullname="Toerless Eckert"/> | ||||
and <contact fullname="Rishabh Parekh"/> for helpful conversation on | ||||
the document. | ||||
</t> | ||||
</section> | ||||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 171 change blocks. | ||||
565 lines changed or deleted | 659 lines changed or added | |||
This html diff was produced by rfcdiff 1.45. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |