rfc9332.original.xml | rfc9332.xml | |||
---|---|---|---|---|
<?xml version='1.0' encoding='utf-8'?> | <?xml version="1.0" encoding="UTF-8"?> | |||
<!DOCTYPE rfc [ | <!DOCTYPE rfc [ | |||
<!ENTITY nbsp " "> | <!ENTITY nbsp " "> | |||
<!ENTITY zwsp "​"> | <!ENTITY zwsp "​"> | |||
<!ENTITY nbhy "‑"> | <!ENTITY nbhy "‑"> | |||
<!ENTITY wj "⁠"> | <!ENTITY wj "⁠"> | |||
]> | ]> | |||
<!-- This template is for creating an Internet Draft using xml2rfc, | ||||
which is available here: http://xml.resource.org. --> | ||||
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> | ||||
<!-- used by XSLT processors --> | ||||
<!-- For a complete list and description of processing instructions (PIs), | ||||
please see http://xml.resource.org/authoring/README.html. --> | ||||
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds | ||||
might want to use. | ||||
(Here they are set differently than their defaults in xml2rfc v1.32) --> | ||||
<?rfc strict="yes" ?> | ||||
<!-- give errors regarding ID-nits and DTD validation --> | ||||
<!-- control the table of contents (ToC) --> | ||||
<?rfc toc="yes"?> | ||||
<!-- generate a ToC --> | ||||
<?rfc tocdepth="4"?> | ||||
<!-- the number of levels of subsections in ToC. default: 3 --> | ||||
<!-- control references --> | ||||
<?rfc symrefs="yes"?> | ||||
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] --> | ||||
<?rfc sortrefs="yes" ?> | ||||
<!-- sort the reference entries alphabetically --> | ||||
<!-- control vertical white space | ||||
(using these PIs as follows is recommended by the RFC Editor) --> | ||||
<?rfc compact="yes" ?> | ||||
<!-- do not start each main section on a new page --> | ||||
<?rfc subcompact="no" ?> | ||||
<!-- keep one blank line between list items --> | ||||
<!-- end of list of popular I-D processing instructions --> | ||||
<rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="exp" docName="draft-ie | ||||
tf-tsvwg-aqm-dualq-coupled-25" ipr="trust200902" updates="" obsoletes="" submiss | ||||
ionType="IETF" xml:lang="en" tocInclude="true" tocDepth="4" symRefs="true" sortR | ||||
efs="true" version="3"> | ||||
<!-- xml2rfc v2v3 conversion 3.14.1 --> | ||||
<!-- category values: std, bcp, info, exp, and historic | ||||
ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902 | ||||
, | ||||
or pre5378Trust200902 | ||||
you can add the attributes updates="NNNN" and obsoletes="NNNN" | ||||
they will automatically be output with "(if approved)" --> | ||||
<!-- ***** FRONT MATTER ***** --> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" submissionType="IETF" category=" | |||
exp" consensus="true" docName="draft-ietf-tsvwg-aqm-dualq-coupled-25" number="93 | ||||
32" ipr="trust200902" updates="" obsoletes="" xml:lang="en" tocInclude="true" to | ||||
cDepth="4" | ||||
symRefs="true" sortRefs="true" version="3"> | ||||
<!-- xml2rfc v2v3 conversion 3.14.1 --> | ||||
<front> | <front> | |||
<!-- The abbreviated title is used in the page header - it is only necessary | <title abbrev="DualQ Coupled AQMs">Dual-Queue Coupled Active Queue Managemen | |||
if the | t (AQM) for Low Latency, Low Loss, and Scalable Throughput (L4S)</title> | |||
full title is longer than 39 characters --> | <seriesInfo name="RFC" value="9332"/> | |||
<title abbrev="DualQ Coupled AQMs">DualQ Coupled AQMs for Low Latency, Low | ||||
Loss and Scalable Throughput (L4S)</title> | ||||
<seriesInfo name="Internet-Draft" value="draft-ietf-tsvwg-aqm-dualq-coupled- | ||||
25"/> | ||||
<author fullname="Koen De Schepper" initials="K." surname="De Schepper"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepper"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street/> | <city>Antwerp</city> | |||
<city>Antwerp</city> | ||||
<country>Belgium</country> | <country>Belgium</country> | |||
</postal> | </postal> | |||
<email>koen.de_schepper@nokia.com</email> | <email>koen.de_schepper@nokia.com</email> | |||
<uri>https://www.bell-labs.com/about/researcher-profiles/koende_schepper /</uri> | <uri>https://www.bell-labs.com/about/researcher-profiles/koende_schepper /</uri> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Bob Briscoe" initials="B." role="editor" surname="Briscoe" > | <author fullname="Bob Briscoe" initials="B." role="editor" surname="Briscoe" > | |||
<organization>Independent</organization> | <organization>Independent</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street/> | <country>United Kingdom</country> | |||
<country>UK</country> | ||||
</postal> | </postal> | |||
<email>ietf@bobbriscoe.net</email> | <email>ietf@bobbriscoe.net</email> | |||
<uri>https://bobbriscoe.net/</uri> | <uri>https://bobbriscoe.net/</uri> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Greg White" initials="G." surname="White"> | <author fullname="Greg White" initials="G." surname="White"> | |||
<organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street/> | <city>Louisville</city> | |||
<city>Louisville, CO</city> | <region>CO</region> | |||
<country>US</country> | <country>United States of America</country> | |||
</postal> | </postal> | |||
<email>G.White@CableLabs.com</email> | <email>G.White@CableLabs.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<!-- <author fullname="Olga Albisser" initials="O." surname="Albisser"> | <date year="2023" month="January" /> | |||
<organization>Simula Research Lab</organization> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city>Lysaker</city> | ||||
<country>Norway</country> | ||||
</postal> | ||||
<email>olga@albisser.org</email> | ||||
<uri>https://www.simula.no/people/olgabo</uri> | ||||
</address> | ||||
</author> | ||||
<author fullname="Ing Jyh Tsang" initials="I." surname="Tsang"> | ||||
<organization>Nokia</organization> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city>Antwerp</city> | ||||
<country>Belgium</country> | <area>tsv</area> | |||
</postal> | <workgroup>tsvwg</workgroup> | |||
<email>ing-jyh.tsang@nokia.com</email> | <keyword>Performance</keyword> | |||
</address> | <keyword>Queuing Delay</keyword> | |||
</author> | <keyword>One Way Delay</keyword> | |||
<keyword>Round-Trip Time</keyword> | ||||
<keyword>RTT</keyword> | ||||
<keyword>Jitter</keyword> | ||||
<keyword>Congestion Control</keyword> | ||||
<keyword>Congestion Avoidance</keyword> | ||||
<keyword>Quality of Service</keyword> | ||||
<keyword>QoS</keyword> | ||||
<keyword>Quality of Experience</keyword> | ||||
<keyword>QoE</keyword> | ||||
<keyword>Active Queue Management</keyword> | ||||
<keyword>AQM</keyword> | ||||
<keyword>Explicit Congestion Notification</keyword> | ||||
<keyword>ECN</keyword> | ||||
<keyword>Pacing</keyword> | ||||
<keyword>Burstiness</keyword> | ||||
<date month="" year=""/> | ||||
<area>Transport</area> | ||||
<workgroup>Transport Area working group (tsvwg)</workgroup> | ||||
<keyword>Internet-Draft</keyword> | ||||
<keyword>I-D</keyword> | ||||
<abstract> | <abstract> | |||
<t>This specification defines a framework for coupling the Active Queue | <t>This specification defines a framework for coupling the Active Queue | |||
Management (AQM) algorithms in two queues intended for flows with | Management (AQM) algorithms in two queues intended for flows with | |||
different responses to congestion. This provides a way for the Internet | different responses to congestion. This provides a way for the Internet | |||
to transition from the scaling problems of standard TCP Reno-friendly | to transition from the scaling problems of standard TCP-Reno-friendly | |||
('Classic') congestion controls to the family of 'Scalable' congestion | ('Classic') congestion controls to the family of 'Scalable' congestion | |||
controls. These are designed for consistently very Low queuing Latency, | controls. These are designed for consistently very low queuing latency, | |||
very Low congestion Loss and Scaling of per-flow throughput (L4S) by | very low congestion loss, and scaling of per-flow throughput by | |||
using Explicit Congestion Notification (ECN) in a modified way. Until | using Explicit Congestion Notification (ECN) in a modified way. Until | |||
the Coupled DualQ, these scalable L4S congestion controls could only be | the Coupled Dual Queue (DualQ), these Scalable L4S congestion controls cou ld only be | |||
deployed where a clean-slate environment could be arranged, such as in | deployed where a clean-slate environment could be arranged, such as in | |||
private data centres.</t> | private data centres.</t> | |||
<t>The specification first explains how a Coupled DualQ works. It then | ||||
<t>This specification first explains how a Coupled DualQ works. It then | ||||
gives the normative requirements that are necessary for it to work well. | gives the normative requirements that are necessary for it to work well. | |||
All this is independent of which two AQMs are used, but pseudocode | All this is independent of which two AQMs are used, but pseudocode | |||
examples of specific AQMs are given in appendices.</t> | examples of specific AQMs are given in appendices.</t> | |||
</abstract> | </abstract> | |||
</front> | </front> | |||
<middle> | <middle> | |||
<section anchor="dualq_intro" numbered="true" toc="default"> | <section anchor="dualq_intro" numbered="true" toc="default"> | |||
<name>Introduction</name> | <name>Introduction</name> | |||
<t>This document specifies a framework for DualQ Coupled AQMs, which can | <t>This document specifies a framework for DualQ Coupled AQMs, which can | |||
serve as the network part of the L4S architecture <xref target="I-D.ietf-t | serve as the network part of the L4S architecture <xref target="RFC9330" f | |||
svwg-l4s-arch" format="default"/>. A Coupled DualQ AQM consists of two | ormat="default"/>. A DualQ Coupled AQM consists of two | |||
queues; L4S and Classic. The L4S queue is intended for Scalable | queues: L4S and Classic. The L4S queue is intended for Scalable | |||
congestion controls that can maintain very low queuing latency | congestion controls that can maintain very low queuing latency | |||
(sub-millisecond on average) and high throughput at the same time. The | (sub-millisecond on average) and high throughput at the same time. The | |||
Coupled DualQ acts like a semi-permeable membrane: the L4S queue | Coupled DualQ acts like a semi-permeable membrane: the L4S queue | |||
isolates the sub-millisecond average queuing delay of L4S from Classic | isolates the sub-millisecond average queuing delay of L4S from Classic | |||
latency; while the coupling between the queues pools the capacity | latency, while the coupling between the queues pools the capacity | |||
between both queues so that ad hoc numbers of capacity-seeking | between both queues so that ad hoc numbers of capacity-seeking | |||
applications all sharing the same capacity can have roughly equivalent | applications all sharing the same capacity can have roughly equivalent | |||
throughput per flow, whichever queue they use. The DualQ achieves this | throughput per flow, whichever queue they use. The DualQ achieves this | |||
indirectly, without having to inspect transport layer flow identifiers | indirectly, without having to inspect transport-layer flow identifiers | |||
and without compromising the performance of the Classic traffic, | and without compromising the performance of the Classic traffic, | |||
relative to a single queue. The DualQ design has low complexity and | relative to a single queue. The DualQ design has low complexity and | |||
requires no configuration for the public Internet.</t> | requires no configuration for the public Internet.</t> | |||
<section anchor="dualq_problem" numbered="true" toc="default"> | <section anchor="dualq_problem" numbered="true" toc="default"> | |||
<name>Outline of the Problem</name> | <name>Outline of the Problem</name> | |||
<t>Latency is becoming the critical performance factor for many | <t>Latency is becoming the critical performance factor for many | |||
(most?) applications on the public Internet, e.g. interactive | (perhaps most) applications on the public Internet, e.g., interactive | |||
Web, Web services, voice, conversational video, interactive video, | web, web services, voice, conversational video, interactive video, | |||
interactive remote presence, instant messaging, online gaming, remote | interactive remote presence, instant messaging, online gaming, remote | |||
desktop, cloud-based applications, and video-assisted remote control | desktop, cloud-based applications, and video-assisted remote control | |||
of machinery and industrial processes. Once access network bit rates | of machinery and industrial processes. Once access network bitrates | |||
reach levels now common in the developed world, further increases | reach levels now common in the developed world, further increases | |||
offer diminishing returns unless latency is also addressed <xref target= "Dukkipati06" format="default"/>. In the last decade or so, much has been done | offer diminishing returns unless latency is also addressed <xref target= "Dukkipati06" format="default"/>. In the last decade or so, much has been done | |||
to reduce propagation time by placing caches or servers closer to | to reduce propagation time by placing caches or servers closer to | |||
users. However, queuing remains a major intermittent component of | users. However, queuing remains a major intermittent component of | |||
latency.</t> | latency.</t> | |||
<t>Traditionally very low latency has only been available for a few | <t>Previously, very low latency has only been available for a few | |||
selected low rate applications, that confine their sending rate within | selected low-rate applications, that confine their sending rate within | |||
a specially carved-off portion of capacity, which is prioritized over | a specially carved-off portion of capacity, which is prioritized over | |||
other traffic, e.g. Diffserv EF <xref target="RFC3246" format="default"/ | other traffic, e.g., Diffserv Expedited Forwarding (EF) <xref target="RF | |||
>. Up | C3246" format="default"/>. Up | |||
to now it has not been possible to allow any number of low latency, | to now, it has not been possible to allow any number of low-latency, | |||
high throughput applications to seek to fully utilize available | high throughput applications to seek to fully utilize available | |||
capacity, because the capacity-seeking process itself causes too much | capacity, because the capacity-seeking process itself causes too much | |||
queuing delay.</t> | queuing delay.</t> | |||
<t>To reduce this queuing delay caused by the capacity seeking | ||||
process, changes either to the network alone or to end-systems alone | <t>To reduce this queuing delay caused by the capacity-seeking | |||
process, changes either to the network alone or to end systems alone | ||||
are in progress. L4S involves a recognition that both approaches are | are in progress. L4S involves a recognition that both approaches are | |||
yielding diminishing returns:</t> | yielding diminishing returns:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>Recent state-of-the-art active queue management (AQM) in the | <li>Recent state-of-the-art AQM in the | |||
network, e.g. FQ-CoDel <xref target="RFC8290" format="default"/>, | network, e.g., Flow Queue CoDel <xref target="RFC8290" format="defau | |||
PIE <xref target="RFC8033" format="default"/>, Adaptive RED <xref ta | lt"/>, | |||
rget="ARED01" format="default"/> ) has reduced queuing delay for all traffic, no | Proportional Integral controller Enhanced (PIE) <xref target="RFC803 | |||
t | 3" format="default"/>, and Adaptive Random Early Detection (ARED) <xref target=" | |||
ARED01" format="default"/>), has reduced queuing delay for all traffic, not | ||||
just a select few applications. However, no matter how good the | just a select few applications. However, no matter how good the | |||
AQM, the capacity-seeking (sawtoothing) rate of TCP-like | AQM, the capacity-seeking (sawtoothing) rate of TCP-like | |||
congestion controls represents a lower limit that will either | congestion controls represents a lower limit that will cause either | |||
cause queuing delay to vary or cause the link to be | the queuing delay to vary or the link to be | |||
under-utilized. These AQMs are tuned to allow a typical | underutilized. | |||
capacity-seeking Reno-friendly flow to induce an average queue | These AQMs are tuned to allow a typical | |||
that roughly doubles the base RTT, adding 5-15 ms of queuing on | capacity-seeking TCP-Reno-friendly flow to induce an average queue | |||
average (cf. 500 microseconds with L4S for the same mix of | that roughly doubles the base round-trip time (RTT), adding 5-15 ms | |||
long-running and web traffic). However, for many applications low | of queuing on | |||
average for a mix of long-running flows and web traffic (cf. 500 mic | ||||
roseconds with L4S for the same traffic mix <xref target="L4Seval22" format="def | ||||
ault"/>). However, for many applications, low | ||||
delay is not useful unless it is consistently low. With these | delay is not useful unless it is consistently low. With these | |||
AQMs, 99th percentile queuing delay is 20-30 ms (cf. 2 ms with the | AQMs, 99th percentile queuing delay is 20-30 ms (cf. 2 ms with the | |||
same traffic over L4S).</li> | same traffic over L4S).</li> | |||
<li>Similarly, recent research into using e2e congestion control | ||||
without needing an AQM in the network (e.g. BBR <xref target="I-D.ca | <li>Similarly, recent research into using end-to-end congestion contro | |||
rdwell-iccrg-bbr-congestion-control" format="default"/>) seems to | l | |||
have hit a similar lower limit to queuing delay of about 20ms on | without needing an AQM in the network (e.g., Bottleneck Bandwidth an | |||
average, but there are also regular 25ms delay spikes due to | d Round-trip propagation time (BBR) <xref target="I-D.cardwell-iccrg-bbr-conges | |||
bandwidth probes and 60ms spikes due to flow-starts.</li> | tion-control" format="default"/>) seems to | |||
have hit a similar queuing delay floor of about 20 ms on | ||||
average, but there are also regular 25 ms delay spikes due to | ||||
bandwidth probes and 60 ms spikes due to flow-starts.</li> | ||||
</ul> | </ul> | |||
<t>L4S learns from the experience of Data Center TCP <xref target="RFC82 | <t>L4S learns from the experience of Data Center TCP (DCTCP) <xref targe | |||
57" format="default"/>, which shows the power of complementary changes | t="RFC8257" format="default"/>, which shows the power of complementary changes | |||
both in the network and on end-systems. DCTCP teaches us that two | both in the network and on end systems. DCTCP teaches us that two | |||
small but radical changes to congestion control are needed to cut the | small but radical changes to congestion control are needed to cut the | |||
two major outstanding causes of queuing delay variability:</t> | two major outstanding causes of queuing delay variability:</t> | |||
<ol spacing="normal" type="1"><li>Far smaller rate variations (sawteeth) than Reno-friendly | <ol spacing="normal" type="1"><li>Far smaller rate variations (sawteeth) than Reno-friendly | |||
congestion controls;</li> | congestion controls.</li> | |||
<li>A shift of smoothing and hence smoothing delay from network to | <li>A shift of smoothing and hence smoothing delay from network to | |||
sender.</li> | sender.</li> | |||
</ol> | </ol> | |||
<t>Without the former, a 'Classic' (e.g. Reno-friendly) | <t>Without the former, a 'Classic' (e.g., Reno-friendly) | |||
flow's round trip time (RTT) varies between roughly 1 and 2 times the | flow's RTT varies between roughly 1 and 2 times the | |||
base RTT between the machines in question. Without the latter a | base RTT between the machines in question. Without the latter, a | |||
'Classic' flow's response to changing events is delayed by a | 'Classic' flow's response to changing events is delayed by a | |||
worst-case (transcontinental) RTT, which could be hundreds of times | worst-case (transcontinental) RTT, which could be hundreds of times | |||
the actual smoothing delay needed for the RTT of typical traffic from | the actual smoothing delay needed for the RTT of typical traffic from | |||
localized CDNs.</t> | localized Content Delivery Networks (CDNs).</t> | |||
<t>These changes are the two main features of the family of so-called | <t>These changes are the two main features of the family of so-called | |||
'Scalable' congestion controls (which includes DCTCP, TCP Prague and | 'Scalable' congestion controls (which include DCTCP, Prague, and | |||
SCReAM). Both these changes only reduce delay in combination with a | Self-Clocked Rate Adaptation for Multimedia (SCReAM)). Both of these cha | |||
complementary change in the network and they are both only feasible | nges only reduce delay in combination with a | |||
complementary change in the network, and they are both only feasible | ||||
with ECN, not drop, for the signalling:</t> | with ECN, not drop, for the signalling:</t> | |||
<ol spacing="normal" type="1"><li>The smaller sawteeth allow an extremel | <ol spacing="normal"> | |||
y shallow ECN | <li>The smaller sawteeth allow an extremely shallow ECN | |||
packet-marking threshold in the queue.</li> | packet-marking threshold in the queue.</li> | |||
<li>And no smoothing in the network means that every fluctuation of | <li>No smoothing in the network means that every fluctuation of | |||
the queue is signalled immediately.</li> | the queue is signalled immediately.</li> | |||
</ol> | </ol> | |||
<t>Without ECN, either of these would lead to very high loss | <t>Without ECN, either of these would lead to very high loss | |||
levels. But, with ECN, the resulting high marking levels are just | levels. In contrast, with ECN, the resulting high marking levels are jus | |||
signals, not impairments. (Note that BBRv2 <xref target="BBRv2" format=" | t | |||
default"/> | signals, not impairments. | |||
combines the best of both worlds - it works as a scalable congestion | (Note that BBRv2 <xref target="BBRv2" format="default"/> | |||
control when ECN is available, but also aims to minimize delay when it | combines the best of both worlds -- it works as a Scalable congestion | |||
isn't.)</t> | control when ECN is available, but it also aims to minimize delay when E | |||
CN | ||||
is absent.)</t> | ||||
<t>However, until now, Scalable congestion controls (like DCTCP) did | <t>However, until now, Scalable congestion controls (like DCTCP) did | |||
not co-exist well in a shared ECN-capable queue with existing Classic | not coexist well in a shared ECN-capable queue with existing Classic | |||
(e.g. Reno <xref target="RFC5681" format="default"/> or Cubic <xref targ | (e.g., Reno <xref target="RFC5681" format="default"/> or CUBIC <xref tar | |||
et="RFC8312" format="default"/>) congestion controls -- Scalable controls are | get="RFC8312" format="default"/>) congestion controls -- Scalable controls are | |||
so aggressive that these 'Classic' algorithms would drive themselves | so aggressive that these 'Classic' algorithms would drive themselves | |||
to a small capacity share. Therefore, until now, L4S controls could | to a small capacity share. Therefore, until now, L4S controls could | |||
only be deployed where a clean-slate environment could be arranged, | only be deployed where a clean-slate environment could be arranged, | |||
such as in private data centres (hence the name DCTCP).</t> | such as in private data centres (hence the name DCTCP).</t> | |||
<t>One way to solve the problem of coexistence between Scalable and | <t>One way to solve the problem of coexistence between Scalable and | |||
Classic flows is to use a per-flow-queuing approach such as | Classic flows is to use a per-flow-queuing (FQ) approach such as | |||
FQ-CoDel <xref target="RFC8290" format="default"/>. It classifies packet | FQ-CoDel <xref target="RFC8290" format="default"/>. It classifies packet | |||
s by flow | s by flow | |||
identifier into separate queues in order to isolate sparse flows from | identifier into separate queues in order to isolate sparse flows from | |||
the higher latency in the queues assigned to heavier flows. However, | the higher latency in the queues assigned to heavier flows. However, | |||
if a Classic flow needs both low delay and high throughput, having a | if a Classic flow needs both low delay and high throughput, having a | |||
queue to itself does not isolate it from the harm it causes to itself. | queue to itself does not isolate it from the harm it causes to itself. | |||
Also FQ approaches need to inspect flow identifiers, which is not | Also FQ approaches need to inspect flow identifiers, which is not | |||
always practical.</t> | always practical.</t> | |||
<t>In summary, Scalable congestion controls address the root cause of | <t>In summary, Scalable congestion controls address the root cause of | |||
the latency, loss and scaling problems with Classic congestion | the latency, loss and scaling problems with Classic congestion | |||
controls. Both FQ and DualQ AQMs can be enablers for this smooth low | controls. Both FQ and DualQ AQMs can be enablers for this smooth low-lat | |||
latency scalable behaviour. The DualQ approach is particularly useful | ency | |||
scalable behaviour. The DualQ approach is particularly useful | ||||
because identifying flows is sometimes not practical or desirable.</t> | because identifying flows is sometimes not practical or desirable.</t> | |||
</section> | </section> | |||
<section anchor="dualq_scope" numbered="true" toc="default"> | <section anchor="dualq_scope" numbered="true" toc="default"> | |||
<name>Context, Scope & Applicability</name> | <name>Context, Scope, and Applicability</name> | |||
<t>L4S involves complementary changes in the network and on | <t>L4S involves complementary changes in the network and on | |||
end-systems:</t> | end systems:</t> | |||
<dl newline="false" spacing="normal"> | <dl newline="true" spacing="normal"> | |||
<dt>Network:</dt> | <dt>Network:</dt> | |||
<dd>A DualQ Coupled AQM (defined in the present | <dd>A DualQ Coupled AQM (defined in the present | |||
document) or a modification to flow-queue AQMs (described in | document) or a modification to flow queue AQMs (described in paragra | |||
section 4.2.b of the L4S architecture <xref target="I-D.ietf-tsvwg-l | ph "b" in | |||
4s-arch" format="default"/>);</dd> | Section <xref target="RFC9330" sectionFormat="bare" section="4.2"/> o | |||
<dt>End-system:</dt> | f the L4S architecture <xref target="RFC9330" format="default"/>).</dd> | |||
<dd>A Scalable congestion control (defined | <dt>End system:</dt> | |||
in section 4 of the L4S ECN protocol <xref target="I-D.ietf-tsvwg-ec | <dd>A Scalable congestion control (defined in Section <xref target="RF | |||
n-l4s-id" format="default"/>).</dd> | C9331" sectionFormat="bare" section="4"/> of the L4S ECN protocol spec <xref tar | |||
get="RFC9331" format="default"/>).</dd> | ||||
<dt>Packet identifier:</dt> | <dt>Packet identifier:</dt> | |||
<dd>The network and end-system parts | <dd>The network and end-system parts | |||
of L4S can be deployed incrementally, because they both identify | of L4S can be deployed incrementally, because they both identify | |||
L4S packets using the experimentally assigned explicit congestion | L4S packets using the experimentally assigned ECN codepoints in the | |||
notification (ECN) codepoints in the IP header: ECT(1) and | IP header: ECT(1) and | |||
CE <xref target="RFC8311" format="default"/> <xref target="I-D.ietf- | CE <xref target="RFC8311" format="default"/> <xref target="RFC9331" | |||
tsvwg-ecn-l4s-id" format="default"/>.</dd> | format="default"/>.</dd> | |||
</dl> | </dl> | |||
<t>Data Center TCP (DCTCP <xref target="RFC8257" format="default"/>) is an example | <t>DCTCP <xref target="RFC8257" format="default"/> is an example | |||
of a Scalable congestion control for controlled environments that has | of a Scalable congestion control for controlled environments that has | |||
been deployed for some time in Linux, Windows and FreeBSD operating | been deployed for some time in Linux, Windows, and FreeBSD operating | |||
systems. During the progress of this document through the IETF a | systems. During the progress of this document through the IETF, a | |||
number of other Scalable congestion controls were implemented, | number of other Scalable congestion controls were implemented, | |||
e.g. TCP Prague <xref target="I-D.briscoe-iccrg-prague-congestion-contro | e.g., Prague over TCP and QUIC <xref target="I-D.briscoe-iccrg-prague-co | |||
l" format="default"/> <xref target="PragueLinux" format="default"/>, BBRv2 <xref | ngestion-control" format="default"/> <xref target="PragueLinux" format="default" | |||
target="BBRv2" format="default"/>, <xref target="I-D.cardwell-iccrg-bbr-congest | />, BBRv2 <xref target="BBRv2" format="default"/> <xref target="I-D.cardwell-icc | |||
ion-control" format="default"/>, QUIC Prague and | rg-bbr-congestion-control" format="default"/>, and | |||
the L4S variant of SCREAM for real-time media <xref target="RFC8298" for | the L4S variant of SCReAM for real-time media <xref target="SCReAM-L4S" | |||
mat="default"/>.</t> | format="default"/> <xref target="RFC8298" format="default"/>.</t> | |||
<t>The focus of this specification is to enable deployment of the | ||||
<t>The focus of this specification is to enable deployment of the | ||||
network part of the L4S service. Then, without any management | network part of the L4S service. Then, without any management | |||
intervention, applications can exploit this new network capability as | intervention, applications can exploit this new network capability as | |||
their operating systems migrate to Scalable congestion controls, which | the applications or their operating systems migrate to Scalable congesti on controls, which | |||
can then evolve <em>while</em> their benefits are | can then evolve <em>while</em> their benefits are | |||
being enjoyed by everyone on the Internet.</t> | being enjoyed by everyone on the Internet.</t> | |||
<t>The DualQ Coupled AQM framework can incorporate any AQM designed | <t>The DualQ Coupled AQM framework can incorporate any AQM designed | |||
for a single queue that generates a statistical or deterministic | for a single queue that generates a statistical or deterministic | |||
mark/drop probability driven by the queue dynamics. Pseudocode | mark/drop probability driven by the queue dynamics. Pseudocode | |||
examples of two different DualQ Coupled AQMs are given in the | examples of two different DualQ Coupled AQMs are given in the | |||
appendices. In many cases the framework simplifies the basic control | appendices. | |||
algorithm, and requires little extra processing. Therefore, it is | In many cases the framework simplifies the basic control | |||
algorithm and requires little extra processing. | ||||
Therefore, it is | ||||
believed the Coupled AQM would be applicable and easy to deploy in all | believed the Coupled AQM would be applicable and easy to deploy in all | |||
types of buffers; buffers in cost-reduced mass-market residential | types of buffers such as buffers in cost-reduced mass-market residential | |||
equipment; buffers in end-system stacks; buffers in carrier-scale | equipment; buffers in end-system stacks; buffers in carrier-scale | |||
equipment including remote access servers, routers, firewalls and | equipment including remote access servers, routers, firewalls, and | |||
Ethernet switches; buffers in network interface cards, buffers in | Ethernet switches; buffers in network interface cards; buffers in | |||
virtualized network appliances, hypervisors, and so on.</t> | virtualized network appliances, hypervisors; and so on.</t> | |||
<t>For the public Internet, nearly all the benefit will typically be | <t>For the public Internet, nearly all the benefit will typically be | |||
achieved by deploying the Coupled AQM into either end of the access | achieved by deploying the Coupled AQM into either end of the access | |||
link between a 'site' and the Internet, which is invariably the | link between a 'site' and the Internet, which is invariably the | |||
bottleneck (see section 6.4 of<xref target="I-D.ietf-tsvwg-l4s-arch" for mat="default"/> | bottleneck (see <xref target="RFC9330" sectionFormat="of" section="6.4"/ > | |||
about deployment, which also defines the term 'site' to mean a home, | about deployment, which also defines the term 'site' to mean a home, | |||
an office, a campus or mobile user equipment).</t> | an office, a campus, or mobile user equipment).</t> | |||
<t>Latency is not the only concern of L4S:</t> | <t>Latency is not the only concern of L4S:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>The "Low Loss" part of the name denotes that L4S generally | <li>The 'Low Loss' part of the name denotes that L4S generally | |||
achieves zero congestion loss (which would otherwise cause | achieves zero congestion loss (which would otherwise cause | |||
retransmission delays), due to its use of ECN.</li> | retransmission delays), due to its use of ECN.</li> | |||
<li>The "Scalable throughput" part of the name denotes that the | <li>The 'Scalable throughput' part of the name denotes that the | |||
per-flow throughput of Scalable congestion controls should scale | per-flow throughput of Scalable congestion controls should scale | |||
indefinitely, avoiding the imminent scaling problems with | indefinitely, avoiding the imminent scaling problems with | |||
'TCP-Friendly' congestion control algorithms <xref target="RFC3649" format="default"/>.</li> | 'TCP-Friendly' congestion control algorithms <xref target="RFC3649" format="default"/>.</li> | |||
</ul> | </ul> | |||
<t>The former is clearly in scope of this AQM document. However, | <t>The former is clearly in scope of this AQM document. However, | |||
the latter is an outcome of the end-system behaviour, and therefore | the latter is an outcome of the end-system behaviour and is therefore | |||
outside the scope of this AQM document, even though the AQM is an | outside the scope of this AQM document, even though the AQM is an | |||
enabler.</t> | enabler.</t> | |||
<t>The overall L4S architecture <xref target="I-D.ietf-tsvwg-l4s-arch" f ormat="default"/> gives more detail, including on | <t>The overall L4S architecture <xref target="RFC9330" format="default"/ > gives more detail, including on | |||
wider deployment aspects such as backwards compatibility of Scalable | wider deployment aspects such as backwards compatibility of Scalable | |||
congestion controls in bottlenecks where a DualQ Coupled AQM has not | congestion controls in bottlenecks where a DualQ Coupled AQM has not | |||
been deployed. The supporting papers <xref target="DualPI2Linux" format= | been deployed. The supporting papers <xref target="L4Seval22"/>, <xref t | |||
"default"/>, | arget="DualPI2Linux" format="default"/>, | |||
<xref target="PI2" format="default"/>, <xref target="DCttH19" format="de | <xref target="PI2" format="default"/>, and <xref target="PI2param" forma | |||
fault"/> and <xref target="PI2param" format="default"/> give the full rationale | t="default"/> give the full rationale for the AQM design, both | |||
for the AQM's design, both | ||||
discursively and in more precise mathematical form, as well as the | discursively and in more precise mathematical form, as well as the | |||
results of performance evaluations. The main results have been | results of performance evaluations. The main results have been | |||
validated independently when using the Prague congestion control <xref t arget="Boru20" format="default"/> (experiments are run using Prague and DCTCP, b ut | validated independently when using the Prague congestion control <xref t arget="Boru20" format="default"/> (experiments are run using Prague and DCTCP, b ut | |||
only the former are relevant for validation, because Prague fixes a | only the former is relevant for validation, because Prague fixes a | |||
number of problems with the Linux DCTCP code that make it unsuitable | number of problems with the Linux DCTCP code that make it unsuitable | |||
for the public Internet).</t> | for the public Internet).</t> | |||
</section> | </section> | |||
<section anchor="dualq_Terminology" numbered="true" toc="default"> | <section anchor="dualq_Terminology" numbered="true" toc="default"> | |||
<name>Terminology</name> | <name>Terminology</name> | |||
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", | <t> | |||
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQU | |||
document are to be interpreted as described in <xref target="RFC2119" fo | IRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL | |||
rmat="default"/> <xref target="RFC8174" format="default"/> when, and only when, | NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14> | |||
they | RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
appear in all capitals, as shown here.</t> | "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to | |||
<t>The DualQ Coupled AQM uses two queues for two services. Each of the | be interpreted as | |||
following terms identifies both the service and the queue that | described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | |||
provides the service:</t> | when, and only when, they appear in all capitals, as shown here. | |||
</t> | ||||
<t>The DualQ Coupled AQM uses two queues for two services:</t> | ||||
<dl newline="false" spacing="normal"> | <dl newline="false" spacing="normal"> | |||
<dt>Classic service/queue:</dt> | <dt>Classic Service/Queue:</dt> | |||
<dd>The Classic service is | <dd>The Classic service is | |||
intended for all the congestion control behaviours that co-exist | intended for all the congestion control behaviours that coexist | |||
with Reno <xref target="RFC5681" format="default"/> (e.g. Reno itsel | with Reno <xref target="RFC5681" format="default"/> (e.g., Reno itse | |||
f, | lf, | |||
Cubic <xref target="RFC8312" format="default"/>, TFRC <xref target=" | CUBIC <xref target="RFC8312" format="default"/>, and TFRC <xref targ | |||
RFC5348" format="default"/>).</dd> | et="RFC5348" format="default"/>). The term 'Classic queue' means a queue providi | |||
<dt>Low-Latency, Low-Loss Scalable throughput (L4S) service/queue:</dt | ng the Classic service.</dd> | |||
> | ||||
<dt>Low Latency, Low Loss, and Scalable throughput (L4S) Service/Queue | ||||
:</dt> | ||||
<dd>The | <dd>The | |||
'L4S' service is intended for traffic from scalable congestion | 'L4S' service is intended for traffic from Scalable congestion | |||
control algorithms, such as TCP Prague <xref target="I-D.briscoe-icc | control algorithms, such as the Prague congestion control <xref targ | |||
rg-prague-congestion-control" format="default"/>, which was | et="I-D.briscoe-iccrg-prague-congestion-control" format="default"/>, which was | |||
derived from Data Center TCP <xref target="RFC8257" format="default" | derived from Data Center TCP <xref target="RFC8257" format="default" | |||
/>. The | />. The | |||
L4S service is for more general traffic than just TCP Prague | L4S service is for more general traffic than just Prague | |||
-- it allows the set of congestion controls with similar | -- it allows the set of congestion controls with similar | |||
scaling properties to Prague to evolve, such as the examples of | scaling properties to Prague to evolve, such as the examples listed | |||
Scalable congestion controls listed below (Relentless, SCReAM, | below (Relentless, SCReAM, etc.). The term 'L4S queue' means a queue providing t | |||
etc.).</dd> | he L4S service.</dd> | |||
<dt>Classic Congestion Control:</dt> | <dt>Classic Congestion Control:</dt> | |||
<dd>A congestion control | <dd>A congestion control | |||
behaviour that can co-exist with standard TCP Reno <xref target="RFC | behaviour that can coexist with standard Reno <xref target="RFC5681" | |||
5681" format="default"/> without causing significantly negative impact | format="default"/> without causing significantly negative impact | |||
on its flow rate <xref target="RFC5033" format="default"/>. With Cla | on its flow rate <xref target="RFC5033" format="default"/>. With Cla | |||
ssic | ssic | |||
congestion controls, such as Reno or Cubic, because flow rate has | congestion controls, such as Reno or CUBIC, because flow rate has | |||
scaled since TCP congestion control was first designed in 1988, it | scaled since TCP congestion control was first designed in 1988, it | |||
now takes hundreds of round trips (and growing) to recover after a | now takes hundreds of round trips (and growing) to recover after a | |||
congestion signal (whether a loss or an ECN mark) as shown in the | congestion signal (whether a loss or an ECN mark) as shown in the | |||
examples in section 5.1 of the L4S architecture <xref target="I-D.ie | examples in Section <xref target="RFC9330" sectionFormat="bare" sect | |||
tf-tsvwg-l4s-arch" format="default"/> and in <xref target="RFC3649" format="defa | ion="5.1"/> of the L4S architecture <xref target="RFC9330"/> and in <xref target | |||
ult"/>. Therefore, control of queuing and utilization | ="RFC3649" format="default"/>. Therefore, control of queuing and utilization | |||
becomes very slack, and the slightest disturbances (e.g. from | becomes very slack, and the slightest disturbances (e.g., from | |||
new flows starting) prevent a high rate from being attained.</dd> | new flows starting) prevent a high rate from being attained.</dd> | |||
<dt>Scalable Congestion Control:</dt> | <dt>Scalable Congestion Control:</dt> | |||
<dd>A congestion control | <dd>A congestion control | |||
where the average time from one congestion signal to the next (the | where the average time from one congestion signal to the next (the | |||
recovery time) remains invariant as the flow rate scales, all | recovery time) remains invariant as flow rate scales, all | |||
other factors being equal. This maintains the same degree of | other factors being equal. This maintains the same degree of | |||
control over queueing and utilization whatever the flow rate, as | control over queuing and utilization whatever the flow rate, as | |||
well as ensuring that high throughput is robust to disturbances. | well as ensuring that high throughput is robust to disturbances. | |||
For instance, DCTCP averages 2 congestion signals per round-trip | For instance, DCTCP averages 2 congestion signals per round trip, | |||
whatever the flow rate, as do other recently developed scalable | whatever the flow rate, as do other recently developed Scalable | |||
congestion controls, e.g. Relentless TCP <xref target="I-D.mathis-ic | congestion controls, e.g., Relentless TCP <xref target="I-D.mathis-i | |||
crg-relentless-tcp" format="default"/>, TCP Prague <xref target="I-D.briscoe-icc | ccrg-relentless-tcp" format="default"/>, Prague <xref target="I-D.briscoe-iccrg- | |||
rg-prague-congestion-control" format="default"/>, <xref target="PragueLinux" for | prague-congestion-control" format="default"/> <xref target="PragueLinux" format= | |||
mat="default"/>, BBRv2 <xref target="BBRv2" format="default"/>, <xref target="I- | "default"/>, BBRv2 <xref target="BBRv2" format="default"/> <xref target="I-D.car | |||
D.cardwell-iccrg-bbr-congestion-control" format="default"/> and the L4S | dwell-iccrg-bbr-congestion-control" format="default"/>, and the L4S | |||
variant of SCREAM for real-time media <xref target="SCReAM" format=" | variant of SCReAM for real-time media <xref target="SCReAM-L4S" form | |||
default"/>, <xref target="RFC8298" format="default"/>). For the public | at="default"/> <xref target="RFC8298" format="default"/>. For the public | |||
Internet a Scalable transport has to comply with the requirements | Internet, a Scalable transport has to comply with the requirements | |||
in Section 4 of <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="def | in <xref target="RFC9331" sectionFormat="of" section="4"/> (a.k.a. t | |||
ault"/> | he 'Prague L4S requirements').</dd> | |||
(aka. the 'Prague L4S requirements').</dd> | ||||
<dt>C:</dt> | <dt>C:</dt> | |||
<dd>Abbreviation for Classic, e.g. when used as | <dd>Abbreviation for Classic, e.g., when used as | |||
a subscript.</dd> | a subscript.</dd> | |||
<dt>L:</dt> | <dt>L:</dt> | |||
<dd> | <dd> | |||
<t>Abbreviation for L4S, e.g. when used as a | <t>Abbreviation for L4S, e.g., when used as a | |||
subscript.</t> | subscript.</t> | |||
<t>The terms Classic or L4S can | <t>The terms Classic or L4S can | |||
also qualify other nouns, such as 'codepoint', 'identifier', | also qualify other nouns, such as 'codepoint', 'identifier', | |||
'classification', 'packet', 'flow'. For example: an L4S packet | 'classification', 'packet', and 'flow'. For example, an L4S packet | |||
means a packet with an L4S identifier sent from an L4S congestion | means a packet with an L4S identifier sent from an L4S congestion | |||
control.</t> | control.</t> | |||
<t>Both Classic and L4S services can | <t>Both Classic and L4S services can | |||
cope with a proportion of unresponsive or less-responsive traffic | cope with a proportion of unresponsive or less-responsive traffic | |||
as well, but in the L4S case its rate has to be smooth enough or | as well but, in the L4S case, its rate has to be smooth enough or | |||
low enough not to build a queue (e.g. DNS, VoIP, game sync | low enough to not build a queue (e.g., DNS, Voice over IP (VoIP), ga | |||
me sync | ||||
datagrams, etc.). The DualQ Coupled AQM behaviour is defined to be | datagrams, etc.). The DualQ Coupled AQM behaviour is defined to be | |||
similar to a single FIFO queue with respect to unresponsive and | similar to a single First-In, First-Out (FIFO) queue with respect to unresponsive and | |||
overload traffic.</t> | overload traffic.</t> | |||
</dd> | </dd> | |||
<dt>Reno-friendly:</dt> | <dt>Reno-friendly:</dt> | |||
<dd>The subset of Classic traffic that is | <dd>The subset of Classic traffic that is | |||
friendly to the standard Reno congestion control defined for TCP | friendly to the standard Reno congestion control defined for TCP | |||
in <xref target="RFC5681" format="default"/>. Reno-friendly is used | in <xref target="RFC5681" format="default"/>. | |||
in place of | The TFRC spec <xref target="RFC5348"/> indirectly implies that 'friendly' is | |||
defined as "generally within a factor of two of the sending rate | ||||
of a TCP flow under the same conditions". 'Reno-friendly' is used here in place | ||||
of | ||||
'TCP-friendly', given the latter has become imprecise, because the | 'TCP-friendly', given the latter has become imprecise, because the | |||
TCP protocol is now used with so many different congestion control | TCP protocol is now used with so many different congestion control | |||
behaviours, and Reno is used in non-TCP transports such as | behaviours, and Reno is used in non-TCP transports, such as | |||
QUIC.</dd> | QUIC <xref target="RFC9000"/>.</dd> | |||
<dt>DualQ or DualQ AQM:</dt> | ||||
<dd>Used loosely as shorthand for a Dual-Queue Coupled AQM, where the | ||||
context | ||||
makes 'Coupled AQM' obvious.</dd> | ||||
<dt>Classic ECN:</dt> | <dt>Classic ECN:</dt> | |||
<dd> | <dd> | |||
<t>The original Explicit Congestion | <t>The original Explicit Congestion | |||
Notification (ECN) protocol <xref target="RFC3168" format="default"/ | Notification (ECN) protocol <xref target="RFC3168" format="default"/ | |||
>, which | > that | |||
requires ECN signals to be treated the same as drops, both when | requires ECN signals to be treated as equivalent to drops, both when | |||
generated in the network and when responded to by the | generated in the network and when responded to by the | |||
sender.</t> | sender.</t> | |||
<t>For L4S, the names used for the | <t>For L4S, the names used for the four codepoints of the 2-bit IP-E | |||
four codepoints of the 2-bit IP-ECN field are unchanged from those | CN field are unchanged from those | |||
defined in <xref target="RFC3168" format="default"/>: Not ECT, ECT(0 | defined in the ECN spec <xref target="RFC3168" format="default"/>, i | |||
), ECT(1) and | .e., Not-ECT, ECT(0), ECT(1), and | |||
CE, where ECT stands for ECN-Capable Transport and CE stands for | CE, where ECT stands for ECN-Capable Transport and CE stands for | |||
Congestion Experienced. A packet marked with the CE codepoint is | Congestion Experienced. A packet marked with the CE codepoint is | |||
termed 'ECN-marked' or sometimes just 'marked' where the context | termed 'ECN-marked' or sometimes just 'marked' where the context | |||
makes ECN obvious.</t> | makes ECN obvious.</t> | |||
</dd> | </dd> | |||
</dl> | </dl> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Features</name> | <name>Features</name> | |||
<t>The AQM couples marking and/or dropping from the Classic queue to | <t>The AQM couples marking and/or dropping from the Classic queue to | |||
the L4S queue in such a way that a flow will get roughly the same | the L4S queue in such a way that a flow will get roughly the same | |||
throughput whichever it uses. Therefore, both queues can feed into the | throughput whichever it uses. Therefore, both queues can feed into the | |||
full capacity of a link and no rates need to be configured for the | full capacity of a link, and no rates need to be configured for the | |||
queues. The L4S queue enables Scalable congestion controls like DCTCP | queues. | |||
or TCP Prague to give very low and predictably low latency, without | The L4S queue enables Scalable congestion controls like DCTCP | |||
or Prague to give very low and consistently low latency, without | ||||
compromising the performance of competing 'Classic' Internet | compromising the performance of competing 'Classic' Internet | |||
traffic.</t> | traffic.</t> | |||
<t>Thousands of tests have been conducted in a typical fixed | <t>Thousands of tests have been conducted in a typical fixed | |||
residential broadband setting. Experiments used a range of base round | residential broadband setting. Experiments used a range of base round-tr | |||
trip delays up to 100ms and link rates up to 200 Mb/s between the data | ip | |||
delays up to 100 ms and link rates up to 200 Mb/s between the data | ||||
centre and home network, with varying amounts of background traffic in | centre and home network, with varying amounts of background traffic in | |||
both queues. For every L4S packet, the AQM kept the average queuing | both queues. For every L4S packet, the AQM kept the average queuing | |||
delay below 1ms (or 2 packets where serialization delay exceeded 1ms | delay below 1 ms (or 2 packets where serialization delay exceeded 1 ms | |||
on slower links), with 99th percentile no worse than 2ms. No losses at | on slower links), with the 99th percentile being no worse than 2 ms. No | |||
losses at | ||||
all were introduced by the L4S AQM. Details of the extensive | all were introduced by the L4S AQM. Details of the extensive | |||
experiments are available <xref target="DualPI2Linux" format="default"/> | experiments are available in <xref target="L4Seval22" format="default"/> | |||
, <xref target="PI2" format="default"/>, <xref target="DCttH19" format="default" | and <xref target="DualPI2Linux" format="default"/>. | |||
/>. Subjective testing using | Subjective testing using | |||
very demanding high bandwidth low latency applications over a single | very demanding high-bandwidth low-latency applications over a single | |||
shared access link is also described in <xref target="L4Sdemo16" format= | shared access link is also described in <xref target="L4Sdemo16" format= | |||
"default"/> and summarized in the section about applications | "default"/> and summarized in Section <xref | |||
in the L4S architecture <xref target="I-D.ietf-tsvwg-l4s-arch" format="d | target="RFC9330" sectionFormat="bare" section="6.1"/> of the L4S archite | |||
efault"/> | cture <xref target="RFC9330" format="default"/>. | |||
.</t> | </t> | |||
<t>In all these experiments, the host was connected to the home | <t>In all these experiments, the host was connected to the home | |||
network by fixed Ethernet, in order to quantify the queuing delay that | network by fixed Ethernet, in order to quantify the queuing delay that | |||
can be achieved by a user who cares about delay. It should be | can be achieved by a user who cares about delay. It should be | |||
emphasized that L4S support at the bottleneck link cannot 'undelay' | emphasized that L4S support at the bottleneck link cannot 'undelay' | |||
bursts introduced by another link on the path, for instance by legacy | bursts introduced by another link on the path, for instance by legacy | |||
Wi-Fi equipment. However, if L4S support is added to the queue feeding | Wi-Fi equipment. However, if L4S support is added to the queue feeding | |||
the <em>outgoing</em> WAN link of a home gateway, | the <em>outgoing</em> WAN link of a home gateway, | |||
it would be counterproductive not to also reduce the burstiness of the | it would be counterproductive not to also reduce the burstiness of the | |||
<em>incoming</em> Wi-Fi. Also, trials of Wi-Fi | <em>incoming</em> Wi-Fi. Also, trials of Wi-Fi | |||
equipment with an L4S DualQ Coupled AQM on the <em>outgoing</em> | equipment with an L4S DualQ Coupled AQM on the <em>outgoing</em> | |||
Wi-Fi interface are in progress, and early results of an L4S DualQ | Wi-Fi interface are in progress, and early results of an L4S DualQ | |||
Coupled AQM in a 5G radio access network testbed with emulated outdoor | Coupled AQM in a 5G radio access network testbed with emulated outdoor | |||
cell edge radio fading are given in <xref target="L4S_5G" format="defaul t"/>.</t> | cell edge radio fading are given in <xref target="L4S_5G" format="defaul t"/>.</t> | |||
<t>Unlike Diffserv Expedited Forwarding, the L4S queue does not have | <t>Unlike Diffserv EF, the L4S queue does not have | |||
to be limited to a small proportion of the link capacity in order to | to be limited to a small proportion of the link capacity in order to | |||
achieve low delay. The L4S queue can be filled with a heavy load of | achieve low delay. The L4S queue can be filled with a heavy load of | |||
capacity-seeking flows (TCP Prague etc.) and still achieve low delay. | capacity-seeking flows (Prague, BBRv2, etc.) and still achieve low delay . | |||
The L4S queue does not rely on the presence of other traffic in the | The L4S queue does not rely on the presence of other traffic in the | |||
Classic queue that can be 'overtaken'. It gives low latency to L4S | Classic queue that can be 'overtaken'. | |||
It gives low latency to L4S | ||||
traffic whether or not there is Classic traffic. The tail latency of | traffic whether or not there is Classic traffic. The tail latency of | |||
traffic served by the Classic AQM is sometimes a little better | traffic served by the Classic AQM is sometimes a little better, | |||
sometimes a little worse, when a proportion of the traffic is L4S.</t> | sometimes a little worse, when a proportion of the traffic is L4S.</t> | |||
<t>The two queues are only necessary because:</t> | <t>The two queues are only necessary because:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>the large variations (sawteeth) of Classic flows need roughly a | <li>The large variations (sawteeth) of Classic flows need roughly a | |||
base RTT of queuing delay to ensure full utilization</li> | base RTT of queuing delay to ensure full utilization.</li> | |||
<li>Scalable flows do not need a queue to keep utilization high, | <li>Scalable flows do not need a queue to keep utilization high, | |||
but they cannot keep latency predictably low if they are mixed | but they cannot keep latency consistently low if they are mixed | |||
with Classic traffic,</li> | with Classic traffic.</li> | |||
</ul> | </ul> | |||
<t>The L4S queue has latency priority within sub-round trip | <t>The L4S queue has latency priority within sub-round-trip | |||
timescales, but over longer periods the coupling from the Classic to | timescales, but over longer periods the coupling from the Classic to | |||
the L4S AQM (explained below) ensures that it does not have bandwidth | the L4S AQM (explained below) ensures that it does not have bandwidth | |||
priority over the Classic queue.</t> | priority over the Classic queue.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="dualq_algo" numbered="true" toc="default"> | <section anchor="dualq_algo" numbered="true" toc="default"> | |||
<name>DualQ Coupled AQM</name> | <name>DualQ Coupled AQM</name> | |||
<t>There are two main aspects to the approach:</t> | <t>There are two main aspects to the DualQ Coupled AQM approach:</t> | |||
<ul spacing="normal"> | <ol spacing="normal"> | |||
<li>The Coupled AQM that addresses throughput equivalence between | <li>The Coupled AQM that addresses throughput equivalence between | |||
Classic (e.g. Reno, Cubic) flows and L4S flows (that satisfy | Classic (e.g., Reno or CUBIC) flows and L4S flows (that satisfy | |||
the Prague L4S requirements).</li> | the Prague L4S requirements).</li> | |||
<li>The Dual Queue structure that provides latency separation for L4S | <li>The Dual-Queue structure that provides latency separation for L4S | |||
flows to isolate them from the typically large Classic queue.</li> | flows to isolate them from the typically large Classic queue.</li> | |||
</ul> | </ol> | |||
<!--<t>The following subsections descrbe these two aspects, and how | ||||
packets are classified between the two queues, then a likely overall | ||||
structure of a DualQ Coupled AQM is given. The present document applies | ||||
irrespective of which particular AQMs are used for each queue. So, | ||||
although the structure is intended to be generic, it might not fit well | ||||
around types of AQM yet to be considered. Finally normative requirements | ||||
are given that apply to any specific DualQ Coupled AQM implementation, | ||||
irrespective of which AQMs it uses. Pseudocode of specific examples are | ||||
given in non-normative appendices.</t>--> | ||||
<section anchor="dualq_coupled" numbered="true" toc="default"> | <section anchor="dualq_coupled" numbered="true" toc="default"> | |||
<name>Coupled AQM</name> | <name>Coupled AQM</name> | |||
<t>In the 1990s, the `TCP formula' was derived for the relationship | <t>In the 1990s, the 'TCP formula' was derived for the relationship | |||
between the steady-state congestion window, cwnd, and the drop | between the steady-state congestion window, cwnd, and the drop | |||
probability, p of standard Reno congestion control <xref target="RFC5681 " format="default"/>. To a first order approximation, the steady-state | probability, p of standard Reno congestion control <xref target="RFC5681 " format="default"/>. To a first-order approximation, the steady-state | |||
cwnd of Reno is inversely proportional to the square root of p.</t> | cwnd of Reno is inversely proportional to the square root of p.</t> | |||
<t>The design focuses on Reno as the worst case, because if it does no | <t>The design focuses on Reno as the worst case, because if it does no | |||
harm to Reno, it will not harm Cubic or any traffic designed to be | harm to Reno, it will not harm CUBIC or any traffic designed to be | |||
friendly to Reno. TCP Cubic implements a Reno-compatibility mode, | friendly to Reno. TCP CUBIC implements a Reno-friendly mode, | |||
which is relevant for typical RTTs under 20ms as long as the | which is relevant for typical RTTs under 20 ms as long as the | |||
throughput of a single flow is less than about 350Mb/s. In such cases | throughput of a single flow is less than about 350 Mb/s. In such cases, | |||
it can be assumed that Cubic traffic behaves similarly to Reno. The | it can be assumed that CUBIC traffic behaves similarly to Reno. The | |||
term 'Classic' will be used for the collection of Reno-friendly | term 'Classic' will be used for the collection of Reno-friendly | |||
traffic including Cubic and potentially other experimental congestion | traffic including CUBIC and potentially other experimental congestion | |||
controls intended not to significantly impact the flow rate of | controls intended not to significantly impact the flow rate of | |||
Reno.</t> | Reno.</t> | |||
<t>A supporting paper <xref target="PI2" format="default"/> includes the | <t>A supporting paper <xref target="PI2" format="default"/> includes the | |||
derivation of the equivalent rate equation for DCTCP, for which cwnd | derivation of the equivalent rate equation for DCTCP, for which cwnd | |||
is inversely proportional to p (not the square root), where in this | is inversely proportional to p (not the square root), where in this | |||
case p is the ECN marking probability. DCTCP is not the only | case p is the ECN-marking probability. DCTCP is not the only | |||
congestion control that behaves like this, so the term 'Scalable' will | congestion control that behaves like this, so the term 'Scalable' will | |||
be used for all similar congestion control behaviours (see examples in | be used for all similar congestion control behaviours (see examples in | |||
<xref target="dualq_scope" format="default"/>). The term 'L4S' is used f or traffic | <xref target="dualq_scope" format="default"/>). The term 'L4S' is used f or traffic | |||
driven by a Scalable congestion control that also complies with the | driven by a Scalable congestion control that also complies with the | |||
additional 'Prague L4S' requirements <xref target="I-D.ietf-tsvwg-ecn-l4 | additional 'Prague L4S requirements' <xref target="RFC9331" format="defa | |||
s-id" format="default"/>.</t> | ult"/>.</t> | |||
<t>For safe co-existence, under stationary conditions, a Scalable flow | <t>For safe coexistence, under stationary conditions, a Scalable flow | |||
has to run at roughly the same rate as a Reno TCP flow (all other | has to run at roughly the same rate as a Reno TCP flow (all other | |||
factors being equal). So the drop or marking probability for Classic | factors being equal). So the drop or marking probability for Classic | |||
traffic, p_C has to be distinct from the marking probability for L4S | traffic, p_C, has to be distinct from the marking probability for L4S | |||
traffic, p_L. The original ECN specification <xref target="RFC3168" form | traffic, p_L. The original ECN spec <xref target="RFC3168" format="defau | |||
at="default"/> required these probabilities to be the same, but | lt"/> required these probabilities to be the same, but | |||
<xref target="RFC8311" format="default"/> updates RFC 3168 to enable exp | <xref target="RFC8311" format="default"/> updates <xref target="RFC3168" | |||
eriments in | format="default"/> to enable experiments in | |||
which these probabilities are different.</t> | which these probabilities are different.</t> | |||
<t>Also, to remain stable, Classic sources need the network to smooth | <t>Also, to remain stable, Classic sources need the network to smooth | |||
p_C so it changes relatively slowly. It is hard for a network node to | p_C so it changes relatively slowly. It is hard for a network node to | |||
know the RTTs of all the flows, so a Classic AQM adds a <em>worst-case</ em> RTT of smoothing delay (about 100-200 | know the RTTs of all the flows, so a Classic AQM adds a <em>worst-case</ em> RTT of smoothing delay (about 100-200 | |||
ms). In contrast, L4S shifts responsibility for smoothing ECN feedback | ms). In contrast, L4S shifts responsibility for smoothing ECN feedback | |||
to the sender, which only delays its response by its <em>own</em> RTT, a s well as allowing a more immediate | to the sender, which only delays its response by its <em>own</em> RTT, a s well as allowing a more immediate | |||
response if necessary.</t> | response if necessary.</t> | |||
<t>The Coupled AQM achieves safe coexistence by making the Classic | <t>The Coupled AQM achieves safe coexistence by making the Classic | |||
drop probability p_C proportional to the square of the coupled L4S | drop probability p_C proportional to the square of the coupled L4S | |||
probability p_CL. p_CL is an input to the instantaneous L4S marking | probability p_CL. p_CL is an input to the instantaneous L4S marking | |||
probability p_L but it changes as slowly as p_C. This makes the Reno | probability p_L, but it changes as slowly as p_C. This makes the Reno | |||
flow rate roughly equal the DCTCP flow rate, because the squaring of | flow rate roughly equal the DCTCP flow rate, because the squaring of | |||
p_CL counterbalances the square root of p_C in the 'TCP formula' of | p_CL counterbalances the square root of p_C in the 'TCP formula' of | |||
Classic Reno congestion control.</t> | Classic Reno congestion control.</t> | |||
<t>Stating this as a formula, the relation between Classic drop | <t>Stating this as a formula, the relation between Classic drop | |||
probability, p_C, and the coupled L4S probability p_CL needs to take | probability, p_C, and the coupled L4S probability p_CL needs to take | |||
the form:</t> | the following form:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ p_C = ( p_CL / | ||||
k )^2 (1)]]></artwork> | <sourcecode><![CDATA[ | |||
p_C = ( p_CL / k )^2, (1)]]></sourcecode> | ||||
<t>where k is the constant of proportionality, which is termed the | <t>where k is the constant of proportionality, which is termed the | |||
coupling factor.</t> | 'coupling factor'.</t> | |||
</section> | </section> | |||
<section anchor="dualq" numbered="true" toc="default"> | <section anchor="dualq" numbered="true" toc="default"> | |||
<name>Dual Queue</name> | <name>Dual Queue</name> | |||
<t>Classic traffic needs to build a large queue to prevent | <t>Classic traffic needs to build a large queue to prevent | |||
under-utilization. Therefore, a separate queue is provided for L4S | underutilization. Therefore, a separate queue is provided for L4S | |||
traffic, and it is scheduled with priority over the Classic queue. | traffic, and it is scheduled with priority over the Classic queue. | |||
Priority is conditional to prevent starvation of Classic traffic in | Priority is conditional to prevent starvation of Classic traffic in | |||
certain conditions (see <xref target="dualq_coupled_structure" format="d efault"/>).</t> | certain conditions (see <xref target="dualq_coupled_structure" format="d efault"/>).</t> | |||
<t>Nonetheless, coupled marking ensures that giving priority to L4S | <t>Nonetheless, coupled marking ensures that giving priority to L4S | |||
traffic still leaves the right amount of spare scheduling time for | traffic still leaves the right amount of spare scheduling time for | |||
Classic flows to each get equivalent throughput to DCTCP flows (all | Classic flows to each get equivalent throughput to DCTCP flows (all | |||
other factors such as RTT being equal).</t> | other factors, such as RTT, being equal).</t> | |||
</section> | </section> | |||
<section anchor="dualq_classification" numbered="true" toc="default"> | <section anchor="dualq_classification" numbered="true" toc="default"> | |||
<name>Traffic Classification</name> | <name>Traffic Classification</name> | |||
<t>Both the Coupled AQM and DualQ mechanisms need an identifier to | <t>Both the Coupled AQM and DualQ mechanisms need an identifier to | |||
distinguish L4S (L) and Classic (C) packets. Then the coupling | distinguish L4S (L) and Classic (C) packets. | |||
Then the coupling | ||||
algorithm can achieve coexistence without having to inspect flow | algorithm can achieve coexistence without having to inspect flow | |||
identifiers, because it can apply the appropriate marking or dropping | identifiers, because it can apply the appropriate marking or dropping | |||
probability to all flows of each type. A separate | probability to all flows of each type. A separate | |||
specification <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="default"/ > requires | specification <xref target="RFC9331" format="default"/> requires | |||
the network to treat the ECT(1) and CE codepoints of the ECN field as | the network to treat the ECT(1) and CE codepoints of the ECN field as | |||
this identifier. An additional process document has proved necessary | this identifier. An additional process document has proved necessary | |||
to make the ECT(1) codepoint available for experimentation <xref target= "RFC8311" format="default"/>.</t> | to make the ECT(1) codepoint available for experimentation <xref target= "RFC8311" format="default"/>.</t> | |||
<t>For policy reasons, an operator might choose to steer certain | <t>For policy reasons, an operator might choose to steer certain | |||
packets (e.g. from certain flows or with certain addresses) out | packets (e.g., from certain flows or with certain addresses) out | |||
of the L queue, even though they identify themselves as L4S by their | of the L queue, even though they identify themselves as L4S by their | |||
ECN codepoints. In such cases, the L4S ECN protocol <xref target="I-D.ie | ECN codepoints. In such cases, the L4S ECN protocol <xref target="RFC933 | |||
tf-tsvwg-ecn-l4s-id" format="default"/> says that the device "MUST NOT | 1" format="default"/> states that the device "<bcp14>MUST NOT</bcp14> | |||
alter the end-to-end L4S ECN identifier", so that it is preserved | alter the end-to-end L4S ECN identifier" so that it is preserved | |||
end-to-end. The aim is that each operator can choose how it treats L4S | end to end. The aim is that each operator can choose how it treats L4S | |||
traffic locally, but an individual operator does not alter the | traffic locally, but an individual operator does not alter the | |||
identification of L4S packets, which would prevent other operators | identification of L4S packets, which would prevent other operators | |||
downstream from making their own choices on how to treat L4S | downstream from making their own choices on how to treat L4S | |||
traffic.</t> | traffic.</t> | |||
<t>In addition, an operator could use other identifiers to classify | <t>In addition, an operator could use other identifiers to classify | |||
certain additional packet types into the L queue that it deems will | certain additional packet types into the L queue that it deems will | |||
not risk harm to the L4S service. For instance addresses of specific | not risk harm to the L4S service, for instance, addresses of specific | |||
applications or hosts; specific Diffserv codepoints such as EF | applications or hosts; specific Diffserv codepoints such as EF, Voice-Ad | |||
(Expedited Forwarding), Voice-Admit or the Non-Queue-Building (NQB) | mit, or the Non-Queue-Building (NQB) | |||
per-hop behaviour; or certain protocols (e.g. ARP, DNS) (see | per-hop behaviour; or certain protocols (e.g., ARP and DNS) (see <xref t | |||
Section 5.4.1 of <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="defaul | arget="RFC9331" sectionFormat="of" section="5.4.1"/>. Note | |||
t"/>). Note | that | |||
that the mechanism only reads these identifiers. <xref target="I-D.ietf- | <xref target="RFC9331" format="default"/> states that "a network node <bc | |||
tsvwg-ecn-l4s-id" format="default"/> says it "MUST NOT alter these | p14>MUST NOT</bcp14> | |||
non-ECN identifiers". Thus, the L queue is not solely an L4S queue, it | change Not-ECT or ECT(0) in the IP-ECN field into an L4S identifier." | |||
can be considered more generally as a low latency queue.</t> | Thus, the L queue is not solely an L4S queue; it | |||
can be considered more generally as a low-latency queue.</t> | ||||
</section> | </section> | |||
<section anchor="dualq_coupled_structure" numbered="true" toc="default"> | <section anchor="dualq_coupled_structure" numbered="true" toc="default"> | |||
<name>Overall DualQ Coupled AQM Structure</name> | <name>Overall DualQ Coupled AQM Structure</name> | |||
<t><xref target="dualq_fig_structure" format="default"/> shows the overa ll structure | <t><xref target="dualq_fig_structure" format="default"/> shows the overa ll structure | |||
that any DualQ Coupled AQM is likely to have. This schematic is | that any DualQ Coupled AQM is likely to have. This schematic is | |||
intended to aid understanding of the current designs of DualQ Coupled | intended to aid understanding of the current designs of DualQ Coupled | |||
AQMs. However, it is not intended to preclude other innovative ways of | AQMs. However, it is not intended to preclude other innovative ways of | |||
satisfying the normative requirements in <xref target="dualq_norm_reqs" format="default"/> that minimally define a DualQ Coupled AQM. | satisfying the normative requirements in <xref target="dualq_norm_reqs" format="default"/> that minimally define a DualQ Coupled AQM. | |||
Also, the schematic only illustrates operation under normally expected | Also, the schematic only illustrates operation under normally expected | |||
circumstances; behaviour under overload or with operator-specific | circumstances; behaviour under overload or with operator-specific | |||
classifiers is deferred to <xref target="dualq_unexpected" format="defau lt"/>.</t> | classifiers is deferred to <xref target="dualq_unexpected" format="defau lt"/>.</t> | |||
<t>The classifier on the left separates incoming traffic between the | <t>The classifier on the left separates incoming traffic between the | |||
two queues (L and C). Each queue has its own AQM that determines the | two queues (L and C). Each queue has its own AQM that determines the | |||
likelihood of marking or dropping (p_L and p_C). It has been | likelihood of marking or dropping (p_L and p_C). | |||
proved <xref target="PI2" format="default"/> that it is preferable to co | In <xref target="PI2" format="default"/>, it has been | |||
ntrol load | proved that it is preferable to control load | |||
with a linear controller, then square the output before applying it as | with a linear controller, then square the output before applying it as | |||
a drop probability to Reno-friendly traffic (because Reno congestion | a drop probability to Reno-friendly traffic (because Reno congestion | |||
control decreases its load proportional to the square-root of the | control decreases its load proportional to the square root of the | |||
increase in drop). So, the AQM for Classic traffic needs to be | increase in drop). So, the AQM for Classic traffic needs to be | |||
implemented in two stages: i) a base stage that outputs an internal | implemented in two stages: i) a base stage that outputs an internal | |||
probability p' (pronounced p-prime); and ii) a squaring stage that | probability p' (pronounced 'p-prime') and ii) a squaring stage that | |||
outputs p_C, where</t> | outputs p_C, where</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ p_C = (p')^2. | ||||
(2)]]></artwork> | <sourcecode><![CDATA[ | |||
<t>Substituting for p_C in Eqn (1) gives:</t> | p_C = (p')^2. (2)]]></sourcecode> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ p' = p_CL / k] | <t>Substituting for p_C in equation (1) gives</t> | |||
]></artwork> | <sourcecode><![CDATA[ | |||
p' = p_CL / k.]]></sourcecode> | ||||
<t>So the slow-moving input to ECN marking in the L queue (the | <t>So the slow-moving input to ECN marking in the L queue (the | |||
coupled L4S probability) is:</t> | coupled L4S probability) is</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ p_CL = k*p'. | <sourcecode><![CDATA[ | |||
(3)]]></artwork> | p_CL = k*p'. (3)]]></sourcecode> | |||
<t>The actual ECN marking probability p_L that is applied to the L | <t>The actual ECN-marking probability p_L that is applied to the L | |||
queue needs to track the immediate L queue delay under L-only | queue needs to track the immediate L queue delay under L-only | |||
congestion conditions, as well as track p_CL under coupled congestion | congestion conditions, as well as track p_CL under coupled congestion | |||
conditions. So the L queue uses a native AQM that calculates a | conditions. So the L queue uses a 'Native AQM' that calculates a | |||
probability p'_L as a function of the instantaneous L queue delay. | probability p'_L as a function of the instantaneous L queue delay. | |||
And, given the L queue has conditional priority over the C queue, | And given the L queue has conditional priority over the C queue, | |||
whenever the L queue grows, the AQM ought to apply marking probability | whenever the L queue grows, the AQM ought to apply marking probability | |||
p'_L, but p_L ought not to fall below p_CL. This suggests:</t> | p'_L, but p_L ought to not fall below p_CL. This suggests</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ p_L = max(p'_L | <sourcecode><![CDATA[ | |||
, p_CL), (4)]]></artwork> | p_L = max(p'_L, p_CL), (4)]]></sourcecode> | |||
<t>which has also been found to work very well in | <t>which has also been found to work very well in | |||
practice.</t> | practice.</t> | |||
<t>The two transformations of p' in equations (2) and (3) implement | <t>The two transformations of p' in equations (2) and (3) implement | |||
the required coupling given in equation (1) earlier.</t> | the required coupling given in equation (1) earlier.</t> | |||
<t>The constant of proportionality or coupling factor, k, in equation | <t>The constant of proportionality or coupling factor, k, in equation | |||
(1) determines the ratio between the congestion probabilities (loss or | (1) determines the ratio between the congestion probabilities (loss or | |||
marking) experienced by L4S and Classic traffic. Thus, k indirectly | marking) experienced by L4S and Classic traffic. Thus, k indirectly | |||
determines the ratio between L4S and Classic flow rates, because flows | determines the ratio between L4S and Classic flow rates, because flows | |||
(assuming they are responsive) adjust their rate in response to | (assuming they are responsive) adjust their rate in response to | |||
congestion probability. <xref target="dualq_Choosing_k" format="default" /> gives | congestion probability. <xref target="dualq_Choosing_k" format="default" /> gives | |||
skipping to change at line 680 ¶ | skipping to change at line 645 ¶ | |||
==>|Classifier| ,-------. (k*p') [ priority]==> | ==>|Classifier| ,-------. (k*p') [ priority]==> | |||
| |\ | Base | | \scheduler/ | | |\ | Base | | \scheduler/ | |||
`----------'\\ | AQM |---->: ,'|`-.___.-' | `----------'\\ | AQM |---->: ,'|`-.___.-' | |||
\\ | |p' | <' | | \\ | |p' | <' | | |||
\\ `-------' (p'^2) //`' | \\ `-------' (p'^2) //`' | |||
\\ ^ | // | \\ ^ | // | |||
\\,. | v p_C // | \\,. | v p_C // | |||
< | _________ .------.// | < | _________ .------.// | |||
`\| | | | Drop |/ | `\| | | | Drop |/ | |||
Classic (C) |queue |===>|/mark | | Classic (C) |queue |===>|/mark | | |||
__|______| `------' | __|______| `------']]> | |||
]]></artwork> | Legend: ===> traffic flow | |||
---> control dependency | ||||
</artwork> | ||||
</figure> | </figure> | |||
<t keepWithPrevious="true">Legend: ===> traffic flow; ---> control | ||||
dependency.</t> | ||||
<t>After the AQMs have applied their dropping or marking, the | <t>After the AQMs have applied their dropping or marking, the | |||
scheduler forwards their packets to the link. Even though the | scheduler forwards their packets to the link. Even though the | |||
scheduler gives priority to the L queue, it is not as strong as the | scheduler gives priority to the L queue, it is not as strong as the | |||
coupling from the C queue. This is because, as the C queue grows, the | coupling from the C queue. This is because, as the C queue grows, the | |||
base AQM applies more congestion signals to L traffic (as well as C). | 'Base AQM' applies more congestion signals to L traffic (as well as to C ). | |||
As L flows reduce their rate in response, they use less than the | As L flows reduce their rate in response, they use less than the | |||
scheduling share for L traffic. So, because the scheduler is work | scheduling share for L traffic. So, because the scheduler is work | |||
preserving, it schedules any C traffic in the gaps.</t> | preserving, it schedules any C traffic in the gaps.</t> | |||
<t>Giving priority to the L queue has the benefit of very low L queue | <t>Giving priority to the L queue has the benefit of very low L queue | |||
delay, because the L queue is kept empty whenever L traffic is | delay, because the L queue is kept empty whenever L traffic is | |||
controlled by the coupling. Also, there only has to be a coupling in | controlled by the coupling. Also, there only has to be a coupling in | |||
one direction - from Classic to L4S. Priority has to be conditional in | one direction -- from Classic to L4S. Priority has to be conditional in | |||
some way to prevent the C queue being starved in the short-term (see | some way to prevent the C queue from being starved in the short term (se | |||
e | ||||
<xref target="dualq_Overload_Starvation" format="default"/>) to give C t raffic a means | <xref target="dualq_Overload_Starvation" format="default"/>) to give C t raffic a means | |||
to push in, as explained next. With normal responsive L traffic, the | to push in, as explained next. With normal responsive L traffic, the | |||
coupled ECN marking gives C traffic the ability to push back against | coupled ECN marking gives C traffic the ability to push back against | |||
even strict priority, by congestion marking the L traffic to make it | even strict priority, by congestion marking the L traffic to make it | |||
yield some space. However, if there is just a small finite set of C | yield some space. However, if there is just a small finite set of C | |||
packets (e.g. a DNS request or an initial window of data) some | packets (e.g., a DNS request or an initial window of data), some | |||
Classic AQMs will not induce enough ECN marking in the L queue, no | Classic AQMs will not induce enough ECN marking in the L queue, no | |||
matter how long the small set of C packets waits. Then, if the L queue | matter how long the small set of C packets waits. Then, if the L queue | |||
happens to remain busy, the C traffic would never get a scheduling | happens to remain busy, the C traffic would never get a scheduling | |||
opportunity from a strict priority scheduler. Ideally the Classic AQM | opportunity from a strict priority scheduler. Ideally, the Classic AQM | |||
would be designed to increase the coupled marking the longer that C | would be designed to increase the coupled marking the longer that C | |||
packets have been waiting, but this is not always practical - hence | packets have been waiting, but this is not always practical -- hence | |||
the need for L priority to be conditional. Giving a small weight or | the need for L priority to be conditional. Giving a small weight or | |||
limited waiting time for C traffic improves response times for short | limited waiting time for C traffic improves response times for short | |||
Classic messages, such as DNS requests, and improves Classic flow | Classic messages, such as DNS requests, and improves Classic flow | |||
startup because immediate capacity is available.</t> | startup because immediate capacity is available.</t> | |||
<t>Example DualQ Coupled AQM algorithms called DualPI2 and Curvy RED | <t>Example DualQ Coupled AQM algorithms called 'DualPI2' and 'Curvy RED' | |||
are given in <xref target="dualq_Ex_algo_pi2" format="default"/> and <xr | are given in Appendices <xref target="dualq_Ex_algo_pi2" format="counter | |||
ef target="dualq_Ex_algo" format="default"/>. Either example AQM can be used to | "/> and <xref target="dualq_Ex_algo" format="counter"/>. Either example AQM can | |||
couple | be used to couple | |||
packet marking and dropping across a dual Q.</t> | packet marking and dropping across a DualQ:</t> | |||
<t>DualPI2 uses a Proportional-Integral (PI) controller as the Base | <ul spacing="normal"> | |||
<li><t>DualPI2 uses a Proportional Integral (PI) controller as the Base | ||||
AQM. Indeed, this Base AQM with just the squared output and no L4S | AQM. Indeed, this Base AQM with just the squared output and no L4S | |||
queue can be used as a drop-in replacement for PIE <xref target="RFC8033 | queue can be used as a drop-in replacement for PIE <xref target="RFC8033 | |||
" format="default"/>, in which case it is just called PI2 <xref target="PI2" for | " format="default"/>, in which case it is just called PI2 <xref target="PI2" for | |||
mat="default"/>. PI2 is a principled simplification of PIE that is both | mat="default"/>. | |||
PI2 is a principled simplification of PIE that is both | ||||
more responsive and more stable in the face of dynamically varying | more responsive and more stable in the face of dynamically varying | |||
load.</t> | load.</t></li> | |||
<t>Curvy RED is derived from RED <xref target="RFC2309" format="default" | <li><t>Curvy RED is derived from RED <xref target="RED" format="default" | |||
/>, except | />, except | |||
its configuration parameters are delay-based to make them insensitive | its configuration parameters are delay-based to make them insensitive | |||
to link rate and it requires fewer operations per packet than RED. | to link rate, and it requires fewer operations per packet than RED. | |||
However, DualPI2 is more responsive and stable over a wider range of | However, DualPI2 is more responsive and stable over a wider range of | |||
RTTs than Curvy RED. As a consequence, at the time of writing, DualPI2 | RTTs than Curvy RED. As a consequence, at the time of writing, DualPI2 | |||
has attracted more development and evaluation attention than Curvy | has attracted more development and evaluation attention than Curvy | |||
RED, leaving the Curvy RED design not so fully evaluated.</t> | RED, leaving the Curvy RED design not so fully evaluated.</t></li> | |||
</ul> | ||||
<t>Both AQMs regulate their queue against targets configured in units | <t>Both AQMs regulate their queue against targets configured in units | |||
of time rather than bytes. As already explained, this ensures | of time rather than bytes. As already explained, this ensures | |||
configuration can be invariant for different drain rates. With AQMs in | configuration can be invariant for different drain rates. With AQMs in | |||
a dualQ structure this is particularly important because the drain | a DualQ structure this is particularly important because the drain | |||
rate of each queue can vary rapidly as flows for the two queues arrive | rate of each queue can vary rapidly as flows for the two queues arrive | |||
and depart, even if the combined link rate is constant.</t> | and depart, even if the combined link rate is constant.</t> | |||
<t>It would be possible to control the queues with other alternative | <t>It would be possible to control the queues with other alternative | |||
AQMs, as long as the normative requirements (those expressed in | AQMs, as long as the normative requirements (those expressed in | |||
capitals) in <xref target="dualq_norm_reqs" format="default"/> are obser ved.</t> | capitals) in <xref target="dualq_norm_reqs" format="default"/> are obser ved.</t> | |||
<t>The two queues could optionally be part of a larger queuing | <t>The two queues could optionally be part of a larger queuing | |||
hierarchy, such as the initial example ideas in <xref target="I-D.brisco e-tsvwg-l4s-diffserv" format="default"/>.</t> | hierarchy, such as the initial example ideas in <xref target="I-D.brisco e-tsvwg-l4s-diffserv" format="default"/>.</t> | |||
</section> | </section> | |||
<section anchor="dualq_norm_reqs" numbered="true" toc="default"> | <section anchor="dualq_norm_reqs" numbered="true" toc="default"> | |||
<name>Normative Requirements for a DualQ Coupled AQM</name> | <name>Normative Requirements for a DualQ Coupled AQM</name> | |||
<t>The following requirements are intended to capture only the | <t>The following requirements are intended to capture only the | |||
essential aspects of a DualQ Coupled AQM. They are intended to be | essential aspects of a DualQ Coupled AQM. They are intended to be | |||
independent of the particular AQMs implemented for each queue, but to | independent of the particular AQMs implemented for each queue but to | |||
still define the DualQ framework built around those AQMs.</t> | still define the DualQ framework built around those AQMs.</t> | |||
<section anchor="dualq_functional_reqs" numbered="true" toc="default"> | <section anchor="dualq_functional_reqs" numbered="true" toc="default"> | |||
<name>Functional Requirements</name> | <name>Functional Requirements</name> | |||
<t>A Dual Queue Coupled AQM implementation MUST comply with the | <t>A DualQ Coupled AQM implementation <bcp14>MUST</bcp14> comply with the | |||
prerequisite L4S behaviours for any L4S network node (not just a | prerequisite L4S behaviours for any L4S network node (not just a | |||
DualQ) as specified in section 5 of <xref target="I-D.ietf-tsvwg-ecn-l | DualQ) as specified in <xref | |||
4s-id" format="default"/>. These primarily concern | target="RFC9331" sectionFormat="of" section="5"/>. These primarily concern | |||
classification and remarking as briefly summarized in <xref target="du | classification and re-marking as briefly summarized earlier in <xref t | |||
alq_classification" format="default"/> earlier. But there is also a | arget="dualq_classification" format="default"/>. But | |||
subsection (5.5) giving guidance on reducing the burstiness of the | <xref target="RFC9331" sectionFormat="of" section="5.5"/> also gives | |||
guidance on reducing the burstiness of the | ||||
link technology underlying any L4S AQM.</t> | link technology underlying any L4S AQM.</t> | |||
<t>A Dual Queue Coupled AQM implementation MUST utilize two queues, | <t>A DualQ Coupled AQM implementation <bcp14>MUST</bcp14> utilize two queues, | |||
each with an AQM algorithm.</t> | each with an AQM algorithm.</t> | |||
<t>The AQM algorithm for the low latency (L) queue MUST be able to | <t>The AQM algorithm for the low-latency (L) queue <bcp14>MUST</bcp14> be able to | |||
apply ECN marking to ECN-capable packets.</t> | apply ECN marking to ECN-capable packets.</t> | |||
<t>The scheduler draining the two queues MUST give L4S packets | <t>The scheduler draining the two queues <bcp14>MUST</bcp14> give L4S | |||
priority over Classic, although priority MUST be bounded in order | packets | |||
not to starve Classic traffic (see <xref target="dualq_Overload_Starva | priority over Classic, although priority <bcp14>MUST</bcp14> be bounde | |||
tion" format="default"/>). The scheduler SHOULD be | d in order | |||
not to starve Classic traffic (see <xref target="dualq_Overload_Starva | ||||
tion" format="default"/>). The scheduler <bcp14>SHOULD</bcp14> be | ||||
work-conserving, or otherwise close to work-conserving. This is | work-conserving, or otherwise close to work-conserving. This is | |||
because Classic traffic needs to be able to efficiently fill any | because Classic traffic needs to be able to efficiently fill any | |||
space left by L4S traffic even though the scheduler would otherwise | space left by L4S traffic even though the scheduler would otherwise | |||
allocate it to L4S.</t> | allocate it to L4S.</t> | |||
<t><xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="default"/> defines the meaning of | <t><xref target="RFC9331" format="default"/> defines the meaning of | |||
an ECN marking on L4S traffic, relative to drop of Classic traffic. | an ECN marking on L4S traffic, relative to drop of Classic traffic. | |||
In order to ensure coexistence of Classic and Scalable L4S traffic, | In order to ensure coexistence of Classic and Scalable L4S traffic, | |||
it says, "The likelihood that an AQM drops a Not-ECT Classic packet | it says, | |||
(p_C) MUST be roughly proportional to the square of the likelihood | "the likelihood that the AQM drops a Not-ECT Classic packet | |||
(p_C) <bcp14>MUST</bcp14> be roughly proportional to the square of the | ||||
likelihood | ||||
that it would have marked it if it had been an L4S packet (p_L)." | that it would have marked it if it had been an L4S packet (p_L)." | |||
The term 'likelihood' is used to allow for marking and dropping to | The term 'likelihood' is used to allow for marking and dropping to | |||
be either probabilistic or deterministic.</t> | be either probabilistic or deterministic.</t> | |||
<t>For the current specification, this translates into the following | <t>For the current specification, this translates into the following | |||
requirement. A DualQ Coupled AQM MUST apply ECN marking to traffic | requirement. A DualQ Coupled AQM <bcp14>MUST</bcp14> apply ECN marking to traffic | |||
in the L queue that is no lower than that derived from the | in the L queue that is no lower than that derived from the | |||
likelihood of drop (or ECN marking) in the Classic queue using Eqn. | likelihood of drop (or ECN marking) in the Classic queue using equatio n | |||
(1).</t> | (1).</t> | |||
<t>The constant of proportionality, k, in Eqn (1) determines the | <t>The constant of proportionality, k, in equation (1) determines the | |||
relative flow rates of Classic and L4S flows when the AQM concerned | relative flow rates of Classic and L4S flows when the AQM concerned | |||
is the bottleneck (all other factors being equal). The L4S ECN | is the bottleneck (all other factors being equal). The L4S ECN | |||
protocol <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="default"/> s | protocol <xref target="RFC9331" format="default"/> says, | |||
ays, "The | ||||
"The | ||||
constant of proportionality (k) does not have to be standardised for | constant of proportionality (k) does not have to be standardised for | |||
interoperability, but a value of 2 is RECOMMENDED."</t> | interoperability, but a value of 2 is <bcp14>RECOMMENDED</bcp14>." | |||
</t> | ||||
<t>Assuming Scalable congestion controls for the Internet will be as | <t>Assuming Scalable congestion controls for the Internet will be as | |||
aggressive as DCTCP, this will ensure their congestion window will | aggressive as DCTCP, this will ensure their congestion window will | |||
be roughly the same as that of a standards track TCP Reno congestion | be roughly the same as that of a Standards Track TCP Reno congestion | |||
control (Reno) <xref target="RFC5681" format="default"/> and other Ren | control (Reno) <xref target="RFC5681" format="default"/> and other Ren | |||
o-friendly | o-friendly | |||
controls, such as TCP Cubic in its Reno-compatibility mode.</t> | controls, such as TCP CUBIC in its Reno-friendly mode.</t> | |||
<!--{ToDo: The TCP Prague requirements are not necessarily final. | ||||
If the aggressiveness of DCTCP is not defined as the benchmark for Scalable cont | ||||
rols on | ||||
the Internet, the recommended value of k will also be subject to change.}--> | ||||
<t>The choice of k is a matter of operator policy, and operators MAY | <t>The choice of k is a matter of operator policy, and operators <bcp1 4>MAY</bcp14> | |||
choose a different value using the guidelines in <xref target="dualq_C hoosing_k" format="default"/>.</t> | choose a different value using the guidelines in <xref target="dualq_C hoosing_k" format="default"/>.</t> | |||
<t>If multiple customers or users share capacity at a bottleneck | <t>If multiple customers or users share capacity at a bottleneck | |||
(e.g. in the Internet access link of a campus network), the | (e.g., in the Internet access link of a campus network), the | |||
operator's choice of k will determine capacity sharing between the | operator's choice of k will determine capacity sharing between the | |||
flows of different customers. However, on the public Internet, | flows of different customers. However, on the public Internet, | |||
access network operators typically isolate customers from each other | access network operators typically isolate customers from each other | |||
with some form of layer-2 multiplexing (OFDM(A) in DOCSIS3.1, CDMA | with some form of Layer 2 multiplexing | |||
in 3G, SC-FDMA in LTE) or L3 scheduling (WRR in DSL), rather than | (OFDM(A) in DOCSIS 3.1, | |||
CDMA in 3G, and SC-FDMA in LTE) or Layer 3 scheduling (Weighted Round Robin (WR | ||||
R) for DSL) rather than | ||||
relying on host congestion controls to share capacity between | relying on host congestion controls to share capacity between | |||
customers <xref target="RFC0970" format="default"/>. In such cases, th e choice | customers <xref target="RFC0970" format="default"/>. In such cases, th e choice | |||
of k will solely affect relative flow rates within each customer's | of k will solely affect relative flow rates within each customer's | |||
access capacity, not between customers. Also, k will not affect | access capacity, not between customers. Also, k will not affect | |||
relative flow rates at any times when all flows are Classic or all | relative flow rates at any times when all flows are Classic or all | |||
flows are L4S, and it will not affect the relative throughput of | flows are L4S, and it will not affect the relative throughput of | |||
small flows.</t> | small flows.</t> | |||
<t/> | <t/> | |||
<section anchor="dualq_unexpected" numbered="true" toc="default"> | <section anchor="dualq_unexpected" numbered="true" toc="default"> | |||
<name>Requirements in Unexpected Cases</name> | <name>Requirements in Unexpected Cases</name> | |||
<t>The flexibility to allow operator-specific classifiers (<xref tar get="dualq_classification" format="default"/>) leads to the need to specify what | <t>The flexibility to allow operator-specific classifiers (<xref tar get="dualq_classification" format="default"/>) leads to the need to specify what | |||
the AQM in each queue ought to do with packets that do not carry | the AQM in each queue ought to do with packets that do not carry | |||
the ECN field expected for that queue. It is expected that the AQM | the ECN field expected for that queue. It is expected that the AQM | |||
in each queue will inspect the ECN field to determine what sort of | in each queue will inspect the ECN field to determine what sort of | |||
congestion notification to signal, then it will decide whether to | congestion notification to signal, then it will decide whether to | |||
apply congestion notification to this particular packet, as | apply congestion notification to this particular packet, as | |||
follows:</t> | follows:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li> | <li> | |||
<t>If a packet that does not carry an ECT(1) or CE codepoint | <t>If a packet that does not carry an ECT(1) or a CE codepoint | |||
is classified into the L queue:</t> | is classified into the L queue, then:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>if the packet is ECT(0), the L AQM SHOULD apply | <li>if the packet is ECT(0), the L AQM <bcp14>SHOULD</bcp14> a | |||
CE-marking using a probability appropriate to Classic | pply | |||
CE marking using a probability appropriate to Classic | ||||
congestion control and appropriate to the target delay in | congestion control and appropriate to the target delay in | |||
the L queue</li> | the L queue</li> | |||
<li> | <li> | |||
<t>if the packet is Not-ECT, the appropriate action | <t>if the packet is Not-ECT, the appropriate action | |||
depends on whether some other function is protecting the L | depends on whether some other function is protecting the L | |||
queue from misbehaving flows (e.g. per-flow queue | queue from misbehaving flows (e.g., per-flow queue | |||
protection <xref target="I-D.briscoe-docsis-q-protection" fo rmat="default"/> or latency | protection <xref target="I-D.briscoe-docsis-q-protection" fo rmat="default"/> or latency | |||
policing):</t> | policing):</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>If separate queue protection is provided, the L AQM | <li>if separate queue protection is provided, the L AQM | |||
SHOULD ignore the packet and forward it unchanged, | <bcp14>SHOULD</bcp14> ignore the packet and forward it u | |||
nchanged, | ||||
meaning it should not calculate whether to apply | meaning it should not calculate whether to apply | |||
congestion notification and it should neither drop nor | congestion notification, and it should neither drop nor | |||
CE-mark the packet (for instance, the operator might | CE mark the packet (for instance, the operator might | |||
classify EF traffic that is unresponsive to drop into | classify EF traffic that is unresponsive to drop into | |||
the L queue, alongside responsive L4S-ECN traffic)</li> | the L queue, alongside responsive L4S-ECN traffic)</li> | |||
<li>if separate queue protection is not provided, the L | <li>if separate queue protection is not provided, the L | |||
AQM SHOULD apply drop using a drop probability | AQM <bcp14>SHOULD</bcp14> apply drop using a drop probab ility | |||
appropriate to Classic congestion control and | appropriate to Classic congestion control and | |||
appropriate to the target delay in the L queue</li> | to the target delay in the L queue</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li> | <li> | |||
<t>If a packet that carries an ECT(1) codepoint is classified | <t>If a packet that carries an ECT(1) codepoint is classified | |||
into the C queue:</t> | into the C queue:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>the C AQM SHOULD apply CE-marking using the coupled AQM | <li>the C AQM <bcp14>SHOULD</bcp14> apply CE marking using the Coupled AQM | |||
probability p_CL (= k*p').</li> | probability p_CL (= k*p').</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
</ul> | </ul> | |||
<t>The above requirements are worded as "SHOULDs", because | <t>The above requirements are worded as "<bcp14>SHOULD</bcp14>"s, be cause | |||
operator-specific classifiers are for flexibility, by definition. | operator-specific classifiers are for flexibility, by definition. | |||
Therefore, alternative actions might be appropriate in the | Therefore, alternative actions might be appropriate in the | |||
operator's specific circumstances. An example would be where the | operator's specific circumstances. | |||
operator knows that certain legacy traffic marked with one | An example would be where the | |||
operator knows that certain legacy traffic set to one | ||||
codepoint actually has a congestion response associated with | codepoint actually has a congestion response associated with | |||
another codepoint.</t> | another codepoint.</t> | |||
<t>If the DualQ Coupled AQM has detected overload, it MUST | <t>If the DualQ Coupled AQM has detected overload, it <bcp14>MUST</b cp14> | |||
introduce Classic drop to both types of ECN-capable traffic until | introduce Classic drop to both types of ECN-capable traffic until | |||
the overload episode has subsided. Introducing drop if ECN marking | the overload episode has subsided. Introducing drop if ECN marking | |||
is persistently high is recommended by Section 7 of the ECN | is persistently high is recommended in | |||
specification <xref target="RFC3168" format="default"/> and Section | ||||
4.2.1 of | Section <xref target="RFC3168" sectionFormat="bare" section="7"/> of | |||
the AQM Recommendations <xref target="RFC7567" format="default"/>.</ | the ECN spec <xref target="RFC3168"/> | |||
t> | and in Section <xref target="RFC7567" sectionFormat="bare" section=" | |||
4.2.1"/> of | ||||
the AQM Recommendations <xref target="RFC7567"/>.</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Management Requirements</name> | <name>Management Requirements</name> | |||
<t/> | <t/> | |||
<section anchor="dualq_config" numbered="true" toc="default"> | <section anchor="dualq_config" numbered="true" toc="default"> | |||
<name>Configuration</name> | <name>Configuration</name> | |||
<t>By default, a DualQ Coupled AQM SHOULD NOT need any | <t>By default, a DualQ Coupled AQM <bcp14>SHOULD NOT</bcp14> need an y | |||
configuration for use at a bottleneck on the public | configuration for use at a bottleneck on the public | |||
Internet <xref target="RFC7567" format="default"/>. The following pa | Internet <xref target="RFC7567" format="default"/>. The following pa | |||
rameters | rameters | |||
MAY be operator-configurable, e.g. to tune for non-Internet | <bcp14>MAY</bcp14> be operator-configurable, e.g., to tune for non-I | |||
nternet | ||||
settings:</t> | settings:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>Optional packet classifier(s) to use in addition to the ECN | <li>Optional packet classifier(s) to use in addition to the ECN | |||
field (see <xref target="dualq_classification" format="default"/ >);</li> | field (see <xref target="dualq_classification" format="default"/ >).</li> | |||
<li> | <li> | |||
<t>Expected typical RTT, which can be used to determine the | <t>Expected typical RTT, which can be used to determine the | |||
queuing delay of the Classic AQM at its operating point, in | queuing delay of the Classic AQM at its operating point, in | |||
order to prevent typical lone flows from under-utilizing | order to prevent typical lone flows from underutilizing | |||
capacity. For example:</t> | capacity. For example:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>for the PI2 algorithm (<xref target="dualq_Ex_algo_pi2" fo | <li>for the PI2 algorithm (<xref target="dualq_Ex_algo_pi2" fo | |||
rmat="default"/>) the queuing delay target is | rmat="default"/>), the queuing delay target is | |||
dependent on the typical RTT;</li> | dependent on the typical RTT.</li> | |||
<li>for the Curvy RED algorithm (<xref target="dualq_Ex_algo" | <li>for the Curvy RED algorithm (<xref target="dualq_Ex_algo" | |||
format="default"/>) the queuing delay at the desired | format="default"/>), the queuing delay at the desired | |||
operating point of the curvy ramp is configured to | operating point of the curvy ramp is configured to | |||
encompass a typical RTT;</li> | encompass a typical RTT.</li> | |||
<li>if another Classic AQM was used, it would be likely to | <li>if another Classic AQM was used, it would be likely to | |||
need an operating point for the queue based on the typical | need an operating point for the queue based on the typical | |||
RTT, and if so it SHOULD be expressed in units of | RTT, and if so, it <bcp14>SHOULD</bcp14> be expressed in uni ts of | |||
time.</li> | time.</li> | |||
</ul> | </ul> | |||
<t>An operating point that is manually calculated might | <t>An operating point that is manually calculated might | |||
be directly configurable instead, e.g. for links with | be directly configurable instead, e.g., for links with | |||
large numbers of flows where under-utilization by a single | large numbers of flows where underutilization by a single | |||
flow would be unlikely.</t> | flow would be unlikely.</t> | |||
</li> | </li> | |||
<li> | <li> | |||
<t>Expected maximum RTT, which can be used to set the | <t>Expected maximum RTT, which can be used to set the | |||
stability parameter(s) of the Classic AQM. For example:</t> | stability parameter(s) of the Classic AQM. For example:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>for the PI2 algorithm (<xref target="dualq_Ex_algo_pi2" fo rmat="default"/>), the gain parameters of the | <li>for the PI2 algorithm (<xref target="dualq_Ex_algo_pi2" fo rmat="default"/>), the gain parameters of the | |||
PI algorithm depend on the maximum RTT.</li> | PI algorithm depend on the maximum RTT.</li> | |||
<li>for the Curvy RED algorithm (<xref target="dualq_Ex_algo" format="default"/>) the smoothing parameter is | <li>for the Curvy RED algorithm (<xref target="dualq_Ex_algo" format="default"/>), the smoothing parameter is | |||
chosen to filter out transients in the queue within a | chosen to filter out transients in the queue within a | |||
maximum RTT.</li> | maximum RTT.</li> | |||
</ul> | </ul> | |||
<t>Stability parameter(s) that are manually calculated | <t>Any stability parameter that is manually calculated | |||
assuming a maximum RTT might be directly configurable | assuming a maximum RTT might be directly configurable | |||
instead.</t> | instead.</t> | |||
</li> | </li> | |||
<li>Coupling factor, k (see <xref target="dualq_Choosing_k" format ="default"/>);</li> | <li>Coupling factor, k (see <xref target="dualq_Choosing_k" format ="default"/>).</li> | |||
<li> | <li> | |||
<t>A limit to the conditional priority of L4S. This is | <t>A limit to the conditional priority of L4S. This is | |||
scheduler-dependent, but it SHOULD be expressed as a relation | scheduler-dependent, but it <bcp14>SHOULD</bcp14> be expressed a s a relation | |||
between the max delay of a C packet and an L packet. For | between the max delay of a C packet and an L packet. For | |||
example:</t> | example:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>for a WRR scheduler a weight ratio between L and C of | <li>for a WRR scheduler, a weight ratio between L and C of | |||
w:1 means that the maximum delay to a C packet is w times | w:1 means that the maximum delay of a C packet is w times | |||
that of an L packet.</li> | that of an L packet.</li> | |||
<li>for a time-shifted FIFO (TS-FIFO) scheduler (see <xref tar get="dualq_Overload_Starvation" format="default"/>) a time-shift of | <li>for a time-shifted FIFO (TS-FIFO) scheduler (see <xref tar get="dualq_Overload_Starvation" format="default"/>), a time-shift of | |||
tshift means that the maximum delay to a C packet is | tshift means that the maximum delay to a C packet is | |||
tshift greater than that of an L packet. tshift could be | tshift greater than that of an L packet. tshift could be | |||
expressed as a multiple of the typical RTT rather than as | expressed as a multiple of the typical RTT rather than as | |||
an absolute delay.</li> | an absolute delay.</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li>The maximum Classic ECN marking probability, p_Cmax, before | <li>The maximum Classic ECN-marking probability, p_Cmax, before | |||
introducing drop.</li> | introducing drop.</li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Monitoring</name> | <name>Monitoring</name> | |||
<t>An experimental DualQ Coupled AQM SHOULD allow the operator to | <t>An experimental DualQ Coupled AQM <bcp14>SHOULD</bcp14> allow the operator to | |||
monitor each of the following operational statistics on demand, | monitor each of the following operational statistics on demand, | |||
per queue and per configurable sample interval, for performance | per queue and per configurable sample interval, for performance | |||
monitoring and perhaps also for accounting in some cases:</t> | monitoring and perhaps also for accounting in some cases:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>Bits forwarded, from which utilization can be | <li>bits forwarded, from which utilization can be | |||
calculated;</li> | calculated;</li> | |||
<li>Total packets in the three categories: arrived, presented | <li>total packets in the three categories: arrived, presented | |||
to the AQM, and forwarded. The difference between the first | to the AQM, and forwarded. The difference between the first | |||
two will measure any non-AQM tail discard. The difference | two will measure any non-AQM tail discard. The difference | |||
between the last two will measure proactive AQM discard;</li> | between the last two will measure proactive AQM discard;</li> | |||
<li>ECN packets marked, non-ECN packets dropped, ECN packets | <li>ECN packets marked, non-ECN packets dropped, and ECN packets | |||
dropped, which can be combined with the three total packet | dropped, which can be combined with the three total packet | |||
counts above to calculate marking and dropping | counts above to calculate marking and dropping | |||
probabilities;</li> | probabilities; and</li> | |||
<li> | <li> | |||
<t>Queue delay (not including serialization delay of the head | <t>queue delay (not including serialization delay of the head | |||
packet or medium acquisition delay) - see further notes | packet or medium acquisition delay) -- see further notes | |||
below.</t> | below.</t> | |||
<t>Unlike the other statistics, | <t>Unlike the other statistics, | |||
queue delay cannot be captured in a simple accumulating | queue delay cannot be captured in a simple accumulating | |||
counter. Therefore, the type of queue delay statistics | counter. Therefore, the type of queue delay statistics | |||
produced (mean, percentiles, etc.) will depend on | produced (mean, percentiles, etc.) will depend on | |||
implementation constraints. To facilitate comparative | implementation constraints. To facilitate comparative | |||
evaluation of different implementations and approaches, an | evaluation of different implementations and approaches, an | |||
implementation SHOULD allow mean and 99th percentile queue | implementation <bcp14>SHOULD</bcp14> allow mean and 99th percent ile queue | |||
delay to be derived (per queue per sample interval). A | delay to be derived (per queue per sample interval). A | |||
relatively simple way to do this would be to store a | relatively simple way to do this would be to store a | |||
coarse-grained histogram of queue delay. This could be done | coarse-grained histogram of queue delay. This could be done | |||
with a small number of bins with configurable edges that | with a small number of bins with configurable edges that | |||
represent contiguous ranges of queue delay. Then, over a | represent contiguous ranges of queue delay. Then, over a | |||
sample interval, each bin would accumulate a count of the | sample interval, each bin would accumulate a count of the | |||
number of packets that had fallen within each range. The | number of packets that had fallen within each range. The | |||
maximum queue delay per queue per interval MAY also be | maximum queue delay per queue per interval <bcp14>MAY</bcp14> al so be | |||
recorded, to aid diagnosis of faults and anomalous events.</t> | recorded, to aid diagnosis of faults and anomalous events.</t> | |||
</li> | </li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Anomaly Detection</name> | <name>Anomaly Detection</name> | |||
<t>An experimental DualQ Coupled AQM SHOULD asynchronously report | <t>An experimental DualQ Coupled AQM <bcp14>SHOULD</bcp14> asynchron ously report | |||
the following data about anomalous conditions:</t> | the following data about anomalous conditions:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li> | <li> | |||
<t>Start-time and duration of overload state.</t> | <t>Start time and duration of overload state.</t> | |||
<t>A hysteresis mechanism SHOULD be used to | <t>A hysteresis mechanism <bcp14>SHOULD</bcp14> be used to | |||
prevent flapping in and out of overload causing an event | prevent flapping in and out of overload causing an event | |||
storm. For instance, exit from overload state could trigger | storm. For instance, exiting from overload state could trigger | |||
one report, but also latch a timer. Then, during that time, if | one report but also latch a timer. Then, during that time, if | |||
the AQM enters and exits overload state any number of times, | the AQM enters and exits overload state any number of times, | |||
the duration in overload state is accumulated, but no new | the duration in overload state is accumulated, but no new | |||
report is generated until the first time the AQM is out of | report is generated until the first time the AQM is out of | |||
overload once the timer has expired.</t> | overload once the timer has expired.</t> | |||
</li> | </li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Deployment, Coexistence and Scaling</name> | <name>Deployment, Coexistence, and Scaling</name> | |||
<t><xref target="RFC5706" format="default"/> suggests that deploymen | <t><xref target="RFC5706" format="default"/> suggests that deploymen | |||
t, coexistence | t, coexistence, | |||
and scaling should also be covered as management requirements. The | and scaling should also be covered as management requirements. The | |||
raison d'etre of the DualQ Coupled AQM is to enable | raison d'etre of the DualQ Coupled AQM is to enable | |||
deployment and coexistence of Scalable congestion controls - as | deployment and coexistence of Scalable congestion controls (as | |||
incremental replacements for today's Reno-friendly controls that | incremental replacements for today's Reno-friendly controls that | |||
do not scale with bandwidth-delay product. Therefore, there is no | do not scale with bandwidth-delay product). Therefore, there is no | |||
need to repeat these motivating issues here given they are already | need to repeat these motivating issues here given they are already | |||
explained in the Introduction and detailed in the L4S | explained in the Introduction and detailed in the L4S | |||
architecture <xref target="I-D.ietf-tsvwg-l4s-arch" format="default" />.</t> | architecture <xref target="RFC9330" format="default"/>.</t> | |||
<t>The descriptions of specific DualQ Coupled AQM algorithms in | <t>The descriptions of specific DualQ Coupled AQM algorithms in | |||
the appendices cover scaling of their configuration parameters, | the appendices cover scaling of their configuration parameters, | |||
e.g. with respect to RTT and sampling frequency.</t> | e.g., with respect to RTT and sampling frequency.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="dualq_IANA" numbered="true" toc="default"> | <section anchor="dualq_IANA" numbered="true" toc="default"> | |||
<name>IANA Considerations (to be removed by RFC Editor)</name> | <name>IANA Considerations</name> | |||
<t>This specification contains no IANA considerations.</t> | <t>This document has no IANA actions.</t> | |||
</section> | </section> | |||
<section anchor="dualq_Security_Considerations" numbered="true" toc="default "> | <section anchor="dualq_Security_Considerations" numbered="true" toc="default "> | |||
<name>Security Considerations</name> | <name>Security Considerations</name> | |||
<t/> | <t/> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Low Delay without Requiring Per-Flow Processing</name> | <name>Low Delay without Requiring Per-flow Processing</name> | |||
<t>The L4S architecture <xref target="I-D.ietf-tsvwg-l4s-arch" format="d | <t>The L4S architecture <xref target="RFC9330" format="default"/> | |||
efault"/> | compares the DualQ and FQ approaches to L4S. The | |||
compares the DualQ and per-flow-queuing (FQ) approaches to L4S. The | ||||
privacy considerations section in that document motivates the DualQ on | privacy considerations section in that document motivates the DualQ on | |||
the grounds that users who want to encrypt application flow | the grounds that users who want to encrypt application flow | |||
identifiers, e.g. in IPSec or other encrypted VPN tunnels, don't | identifiers, e.g., in IPsec or other encrypted VPN tunnels, don't | |||
have to sacrifice low delay (<xref target="RFC8404" format="default"/> e ncourages | have to sacrifice low delay (<xref target="RFC8404" format="default"/> e ncourages | |||
avoidance of such privacy compromises).</t> | avoidance of such privacy compromises).</t> | |||
<t>The security considerations section of the L4S architecture also | <t>The security considerations section of the L4S architecture <xref tar | |||
includes subsections on policing of relative flow-rates (section 8.1) | get="RFC9330" format="default"/> also | |||
and on policing of flows that cause excessive queuing delay (section | includes subsections on policing of relative flow rates (Section <xref | |||
8.2). It explains that the interests of users do not collide in the | target="RFC9330" sectionFormat="bare" section="8.1"/>) and on | |||
same way for delay as they do for bandwidth. For someone to get more | policing of flows that cause excessive queuing delay (Section <xref | |||
of the bandwidth of a shared link, someone else necessarily gets less | target="RFC9330" sectionFormat="bare" section="8.2"/>). It explains | |||
(a 'zero-sum game'), whereas queuing delay can be reduced for | that the interests of users do not collide in the same way for delay | |||
everyone, without any need for someone else to lose out. It also | as they do for bandwidth. For someone to get more of the bandwidth of | |||
explains that, on the current Internet, scheduling usually enforces | a shared link, someone else necessarily gets less (a 'zero-sum game'), | |||
separation of bandwidth between 'sites' (e.g. households, | whereas queuing delay can be reduced for everyone, without any need | |||
businesses or mobile users), but it is not common to need to schedule | for someone else to lose out. It also explains that, on the current | |||
or police the bandwidth used by individual application flows.</t> | Internet, scheduling usually enforces separation of bandwidth between | |||
'sites' (e.g., households, businesses, or mobile users), but it is not | ||||
common to need to schedule or police the bandwidth used by individual | ||||
application flows.</t> | ||||
<t>By the above arguments, per-flow rate policing might not be | <t>By the above arguments, per-flow rate policing might not be | |||
necessary and in trusted environments (e.g. private data centres) | necessary, and in trusted environments (e.g., private data centres), | |||
it is certainly unlikely to be needed. Therefore, because it is hard | it is certainly unlikely to be needed. Therefore, because it is hard | |||
to avoid complexity and unintended side effects with per-flow rate | to avoid complexity and unintended side effects with per-flow rate | |||
policing, it needs to be separable from a basic AQM, as an option, | policing, it needs to be separable from a basic AQM, as an option, | |||
under policy control. On this basis, the DualQ Coupled AQM provides | under policy control. On this basis, the DualQ Coupled AQM provides | |||
low delay without prejudging the question of per-flow rate | low delay without prejudging the question of per-flow rate | |||
policing.</t> | policing.</t> | |||
<t>Nonetheless, the interests of users or flows might conflict, | <t>Nonetheless, the interests of users or flows might conflict, | |||
e.g. in case of accident or malice. Then per-flow rate control | e.g., in case of accident or malice. Then per-flow rate control | |||
could be necessary. If flow-rate control is needed, it can be provided | could be necessary. If per-flow rate control is needed, it can be provid | |||
ed | ||||
as a modular addition to a DualQ. And similarly, if protection against | as a modular addition to a DualQ. And similarly, if protection against | |||
excessive queue delay is needed, a per-flow queue protection option | excessive queue delay is needed, a per-flow queue protection option | |||
can be added to a DualQ (e.g. <xref target="I-D.briscoe-docsis-q-protect ion" format="default"/>).</t> | can be added to a DualQ (e.g., <xref target="I-D.briscoe-docsis-q-protec tion" format="default"/>).</t> | |||
</section> | </section> | |||
<section anchor="dualq_Overload" numbered="true" toc="default"> | <section anchor="dualq_Overload" numbered="true" toc="default"> | |||
<name>Handling Unresponsive Flows and Overload</name> | <name>Handling Unresponsive Flows and Overload</name> | |||
<t>In the absence of any per-flow control, it is important that the | <t>In the absence of any per-flow control, it is important that the | |||
basic DualQ Coupled AQM gives unresponsive flows no more throughput | basic DualQ Coupled AQM gives unresponsive flows no more throughput | |||
advantage than a single-queue AQM would, and that it at least handles | advantage than a single-queue AQM would, and that it at least handles | |||
overload situations. Overload means that incoming load significantly | overload situations. Overload means that incoming load significantly | |||
or persistently exceeds output capacity, but it is not intended to be | or persistently exceeds output capacity, but it is not intended to be | |||
a precise term -- significant and persistent are matters of | a precise term -- significant and persistent are matters of | |||
degree.</t> | degree.</t> | |||
<t>A trade-off needs to be made between complexity and the risk of | <t>A trade-off needs to be made between complexity and the risk of | |||
either traffic class harming the other. In overloaded conditions the | either traffic class harming the other. In overloaded conditions, the | |||
higher priority L4S service will have to sacrifice some aspect of its | higher priority L4S service will have to sacrifice some aspect of its | |||
performance. Depending on the degree of overload, alternative | performance. Depending on the degree of overload, alternative | |||
solutions may relax a different factor: e.g. throughput, delay, | solutions may relax a different factor: for example, throughput, delay, | |||
drop. These choices need to be made either by the developer or by | or drop. These choices need to be made either by the developer or by | |||
operator policy, rather than by the IETF. Subsequent subsections | operator policy, rather than by the IETF. | |||
discuss aspects relating to handling of different degrees of overload: | Subsequent subsections | |||
discuss handling different degrees of overload: | ||||
</t> | </t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li> | <li> | |||
<t>Unresponsive flows (L and/or C) but not overloaded, | <t>Unresponsive flows (L and/or C) but not overloaded, | |||
i.e. the sum of unresponsive load before adding any | i.e., the sum of unresponsive load before adding any | |||
responsive traffic is below capacity;</t> | responsive traffic is below capacity.</t> | |||
<ul empty="true" spacing="normal"> | <ul empty="true" spacing="normal"> | |||
<li>This case is handled by the regular Coupled DualQ (<xref targe t="dualq_coupled" format="default"/>) but not discussed there. So below, | <li>This case is handled by the regular Coupled DualQ (<xref targe t="dualq_coupled" format="default"/>) but not discussed there. So below, | |||
<xref target="dualq_unresponsive_wo_overload" format="default"/> explains the | <xref target="dualq_unresponsive_wo_overload" format="default"/> explains the | |||
design goal, and how it is achieved in practice;</li> | design goal and how it is achieved in practice.</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li> | <li> | |||
<t>Unresponsive flows (L and/or C) causing persistent overload, | <t>Unresponsive flows (L and/or C) causing persistent overload, | |||
i.e. the sum of unresponsive load even before adding any | i.e., the sum of unresponsive load even before adding any | |||
responsive traffic persistently exceeds capacity;</t> | responsive traffic persistently exceeds capacity.</t> | |||
<ul empty="true" spacing="normal"> | <ul empty="true" spacing="normal"> | |||
<li>This case is not covered by the regular Coupled DualQ | <li>This case is not covered by the regular Coupled DualQ | |||
mechanism (<xref target="dualq_coupled" format="default"/>) but the last para | mechanism (<xref target="dualq_coupled" format="default"/>), but the last paragraph | |||
in <xref target="dualq_unexpected" format="default"/> sets out a requirement to | in <xref target="dualq_unexpected" format="default"/> sets out a requirement to | |||
handle the case where ECN-capable traffic could starve | handle the case where ECN-capable traffic could starve | |||
non-ECN-capable traffic. <xref target="dualq_Overload_Saturation " format="default"/> below discusses the | non-ECN-capable traffic. <xref target="dualq_Overload_Saturation " format="default"/> below discusses the | |||
general options and gives specific examples.</li> | general options and gives specific examples.</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li> | <li> | |||
<t>Short-term overload that lies between the 'not overloaded' and | <t>Short-term overload that lies between the 'not overloaded' and | |||
'persistently overloaded' cases. </t> | 'persistently overloaded' cases.</t> | |||
<ul empty="true" spacing="normal"> | <ul empty="true" spacing="normal"> | |||
<li>For the period before overload is deemed persistent, <xref tar get="dualq_Overload_Starvation" format="default"/> discusses options for | <li>For the period before overload is deemed persistent, <xref tar get="dualq_Overload_Starvation" format="default"/> discusses options for | |||
more immediate mechanisms at the scheduler timescale. These | more immediate mechanisms at the scheduler timescale. These | |||
prevent short-term starvation of the C queue by making the | prevent short-term starvation of the C queue by making the | |||
priority of the L queue conditional, as required in <xref target ="dualq_functional_reqs" format="default"/>.</li> | priority of the L queue conditional, as required in <xref target ="dualq_functional_reqs" format="default"/>.</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
</ul> | </ul> | |||
<section anchor="dualq_unresponsive_wo_overload" numbered="true" toc="de fault"> | <section anchor="dualq_unresponsive_wo_overload" numbered="true" toc="de fault"> | |||
<name>Unresponsive Traffic without Overload</name> | <name>Unresponsive Traffic without Overload</name> | |||
<t>When one or more L flows and/or C flows are unresponsive, but | <t>When one or more L flows and/or C flows are unresponsive, but | |||
their total load is within the link capacity so that they do not | their total load is within the link capacity so that they do not | |||
saturate the coupled marking (below 100%), the goal of a DualQ AQM | saturate the coupled marking (below 100%), the goal of a DualQ AQM | |||
is to behave no worse than a single-queue AQM.</t> | is to behave no worse than a single-queue AQM.</t> | |||
<t>Tests have shown that this is indeed the case with no additional | <t>Tests have shown that this is indeed the case with no additional | |||
mechanism beyond the regular Coupled DualQ of <xref target="dualq_coup led" format="default"/> (see the results of 'overload experiments' | mechanism beyond the regular Coupled DualQ of <xref target="dualq_coup led" format="default"/> (see the results of 'overload experiments' | |||
in <xref target="DCttH19" format="default"/>). Perhaps counter-intuiti vely, whether | in <xref target="L4Seval22" format="default"/>). Perhaps counterintuit ively, whether | |||
the unresponsive flow classifies itself into the L or the C queue, | the unresponsive flow classifies itself into the L or the C queue, | |||
the DualQ system behaves as if it has subtracted from the overall | the DualQ system behaves as if it has subtracted from the overall | |||
link capacity. Then, the coupling shares out the remaining capacity | link capacity. Then, the coupling shares out the remaining capacity | |||
between any competing responsive flows (in either queue). See also | between any competing responsive flows (in either queue). See also | |||
<xref target="dualq_Overload_Starvation" format="default"/>, which dis cusses | <xref target="dualq_Overload_Starvation" format="default"/>, which dis cusses | |||
scheduler-specific details.</t> | scheduler-specific details.</t> | |||
</section> | </section> | |||
<section anchor="dualq_Overload_Starvation" numbered="true" toc="default "> | <section anchor="dualq_Overload_Starvation" numbered="true" toc="default "> | |||
<name>Avoiding Short-Term Classic Starvation: Sacrifice L4S Throughput or Delay?</name> | <name>Avoiding Short-Term Classic Starvation: Sacrifice L4S Throughput or Delay?</name> | |||
<t>Priority of L4S is required to be conditional (see <xref target="du alq_coupled_structure" format="default"/> & <xref target="dualq_functional_r eqs" format="default"/>) to avoid short-term starvation of | <t>Priority of L4S is required to be conditional (see Sections <xref t arget="dualq_coupled_structure" format="counter"/> and <xref target="dualq_funct ional_reqs" format="counter"/>) to avoid short-term starvation of | |||
Classic. Otherwise, as explained in <xref target="dualq_coupled_struct ure" format="default"/>, even a lone responsive L4S flow | Classic. Otherwise, as explained in <xref target="dualq_coupled_struct ure" format="default"/>, even a lone responsive L4S flow | |||
could temporarily block a small finite set of C packets | could temporarily block a small finite set of C packets | |||
(e.g. an initial window or DNS request). The blockage would | (e.g., an initial window or DNS request). The blockage would | |||
only be brief, but it could be longer for certain AQM | only be brief, but it could be longer for certain AQM | |||
implementations that can only increase the congestion signal coupled | implementations that can only increase the congestion signal coupled | |||
from the C queue when C packets are actually being dequeued. There | from the C queue when C packets are actually being dequeued. There | |||
is then the question of whether to sacrifice L4S throughput or L4S | is then the question of whether to sacrifice L4S throughput or L4S | |||
delay (or some other policy) to make the priority conditional:</t> | delay (or some other policy) to make the priority conditional:</t> | |||
<dl newline="false" spacing="normal"> | <dl newline="true" spacing="normal"> | |||
<dt>Sacrifice L4S throughput: </dt> | <dt>Sacrifice L4S throughput: </dt> | |||
<dd anchor="dualq_Minimum_Service"> | <dd anchor="dualq_Minimum_Service"> | |||
<t>By using weighted | <t>By using WRR as the conditional priority scheduler, the L4S | |||
round-robin as the conditional priority scheduler, the L4S | ||||
service can sacrifice some throughput during overload. This can | service can sacrifice some throughput during overload. This can | |||
either be thought of as guaranteeing a minimum throughput | be thought of as guaranteeing either a minimum throughput | |||
service for Classic traffic, or as guaranteeing a maximum delay | service for Classic traffic or a maximum delay | |||
for a packet at the head of the Classic queue.</t> | for a packet at the head of the Classic queue.</t> | |||
<t>Cautionary note: a WRR scheduler can only | <aside><t>Cautionary note: a WRR scheduler can only | |||
guarantee Classic throughput if Classic sources are sending | guarantee Classic throughput if Classic sources are sending | |||
enough to use it -- congestion signals can undermine | enough to use it -- congestion signals can undermine | |||
scheduling because they determine how much responsive traffic of | scheduling because they determine how much responsive traffic of | |||
each class arrives for scheduling in the first place. This is | each class arrives for scheduling in the first place. This is | |||
why scheduling is only relied on to handle short-term | why scheduling is only relied on to handle short-term | |||
starvation; until congestion signals build up and the sources | starvation, until congestion signals build up and the sources | |||
react. Even during long-term overload (discussed more fully in | react. Even during long-term overload (discussed more fully in | |||
<xref target="dualq_Overload_Saturation" format="default"/>), it's pragmatic to | <xref target="dualq_Overload_Saturation" format="default"/>), it's pragmatic to | |||
discard packets from both queues, which again thins the traffic | discard packets from both queues, which again thins the traffic | |||
before it reaches the scheduler. This is because a scheduler | before it reaches the scheduler. This is because a scheduler | |||
cannot be relied on to handle long-term overload since the right | cannot be relied on to handle long-term overload since the right | |||
scheduler weight cannot be known for every scenario.</t> | scheduler weight cannot be known for every scenario.</t></aside> | |||
<t>The scheduling weight of the Classic queue | <t>The scheduling weight of the Classic queue | |||
should be small (e.g. 1/16). In most traffic scenarios the | should be small (e.g., 1/16). In most traffic scenarios, the | |||
scheduler will not interfere and it will not need to, because | scheduler will not interfere and it will not need to, because | |||
the coupling mechanism and the end-systems will determine the | the coupling mechanism and the end systems will determine the | |||
share of capacity across both queues as if it were a single | share of capacity across both queues as if it were a single | |||
pool. However, if L4S traffic is over-aggressive or | pool. However, if L4S traffic is over-aggressive or | |||
unresponsive, the scheduler weight for Classic traffic will at | unresponsive, the scheduler weight for Classic traffic will at | |||
least be large enough to ensure it does not starve in the | least be large enough to ensure it does not starve in the | |||
short-term. </t> | short term. </t> | |||
<t>Although WRR scheduling is | <t>Although WRR scheduling is | |||
only expected to address short-term overload, there are | only expected to address short-term overload, there are | |||
(somewhat rare) cases when WRR has an effect on capacity shares | (somewhat rare) cases when WRR has an effect on capacity shares | |||
over longer time-scales. But its effect is minor, and it | over longer timescales. But its effect is minor, and it | |||
certainly does no harm. Specifically, in cases where the ratio | certainly does no harm. Specifically, in cases where the ratio | |||
of L4S to Classic flows (e.g. 19:1) is greater than the | of L4S to Classic flows (e.g., 19:1) is greater than the | |||
ratio of their scheduler weights (e.g. 15:1), the L4S flows | ratio of their scheduler weights (e.g., 15:1), the L4S flows | |||
will get less than an equal share of the capacity, but only | will get less than an equal share of the capacity, but only | |||
slightly. For instance, with the example numbers given, each L4S | slightly. For instance, with the example numbers given, each L4S | |||
flow will get (15/16)/19 = 4.9% when ideally each would get | flow will get (15/16)/19 = 4.9% when ideally each would get | |||
1/20=5%. In the rather specific case of an unresponsive flow | 1/20 = 5%. In the rather specific case of an unresponsive flow | |||
taking up just less than the capacity set aside for L4S | taking up just less than the capacity set aside for L4S | |||
(e.g. 14/16 in the above example), using WRR could | (e.g., 14/16 in the above example), using WRR could | |||
significantly reduce the capacity left for any responsive L4S | significantly reduce the capacity left for any responsive L4S | |||
flows.</t> | flows.</t> | |||
<t>The scheduling weight of the | <t>The scheduling weight of the | |||
Classic queue should not be too small, otherwise a C packet at | Classic queue should not be too small, otherwise a C packet at | |||
the head of the queue could be excessively delayed by a | the head of the queue could be excessively delayed by a | |||
continually busy L queue. For instance if the Classic weight is | continually busy L queue. For instance, if the Classic weight is | |||
1/16, the maximum that a Classic packet at the head of the queue | 1/16, the maximum that a Classic packet at the head of the queue | |||
can be delayed by L traffic is the serialization delay of 15 | can be delayed by L traffic is the serialization delay of 15 | |||
MTU-sized packets.</t> | MTU-sized packets.</t> | |||
</dd> | </dd> | |||
<dt>Sacrifice L4S Delay:</dt> | <dt>Sacrifice L4S delay:</dt> | |||
<dd anchor="dualq_Delay_Overload"> | <dd anchor="dualq_Delay_Overload"> | |||
<t>The operator could choose to | <t>The operator could choose to | |||
control overload of the Classic queue by allowing some delay to | control overload of the Classic queue by allowing some delay to | |||
'leak' across to the L4S queue. The scheduler can be made to | 'leak' across to the L4S queue. The scheduler can be made to | |||
behave like a single First-In First-Out (FIFO) queue with | behave like a single FIFO queue with | |||
different service times by implementing a very simple | different service times by implementing a very simple | |||
conditional priority scheduler that could be called a | conditional priority scheduler that could be called a | |||
"time-shifted FIFO" (see the Modifier Earliest Deadline First | "time-shifted FIFO" (TS-FIFO) (see the Modifier Earliest Deadline | |||
(MEDF) scheduler <xref target="MEDF" format="default"/>). This sch | First | |||
eduler | (MEDF) scheduler <xref target="MEDF" format="default"/>). This sch | |||
eduler | ||||
adds tshift to the queue delay of the next L4S packet, before | adds tshift to the queue delay of the next L4S packet, before | |||
comparing it with the queue delay of the next Classic packet, | comparing it with the queue delay of the next Classic packet, | |||
then it selects the packet with the greater adjusted queue | then it selects the packet with the greater adjusted queue | |||
delay.</t> | delay.</t> | |||
<t>Under regular conditions, this | <t>Under regular conditions, the | |||
time-shifted FIFO scheduler behaves just like a strict priority | TS-FIFO scheduler behaves just like a strict priority | |||
scheduler. But under moderate or high overload it prevents | scheduler. But under moderate or high overload, it prevents | |||
starvation of the Classic queue, because the time-shift (tshift) | starvation of the Classic queue, because the time-shift (tshift) | |||
defines the maximum extra queuing delay of Classic packets | defines the maximum extra queuing delay of Classic packets | |||
relative to L4S. This would control milder overload of | relative to L4S. | |||
This would control milder overload of | ||||
responsive traffic by introducing delay to defer invoking the | responsive traffic by introducing delay to defer invoking the | |||
overload mechanisms in <xref target="dualq_Overload_Saturation" fo rmat="default"/>, particularly when close to | overload mechanisms in <xref target="dualq_Overload_Saturation" fo rmat="default"/>, particularly when close to | |||
the maximum congestion signal.</t> | the maximum congestion signal.</t> | |||
</dd> | </dd> | |||
</dl> | </dl> | |||
<t>The example implementations in <xref target="dualq_Ex_algo_pi2" for | <t>The example implementations in Appendices <xref target="dualq_Ex_al | |||
mat="default"/> | go_pi2" format="counter"/> | |||
and <xref target="dualq_Ex_algo" format="default"/> could both be impl | and <xref target="dualq_Ex_algo" format="counter"/> could both be impl | |||
emented with | emented with | |||
either policy.</t> | either policy.</t> | |||
</section> | </section> | |||
<section anchor="dualq_Overload_Saturation" numbered="true" toc="default "> | <section anchor="dualq_Overload_Saturation" numbered="true" toc="default "> | |||
<name>L4S ECN Saturation: Introduce Drop or Delay?</name> | <name>L4S ECN Saturation: Introduce Drop or Delay?</name> | |||
<t>This section concerns persistent overload caused by unresponsive | <t>This section concerns persistent overload caused by unresponsive | |||
L and/or C flows. To keep the throughput of both L4S and Classic | L and/or C flows. To keep the throughput of both L4S and Classic | |||
flows roughly equal over the full load range, a different control | flows roughly equal over the full load range, a different control | |||
strategy needs to be defined above the point where the L4S AQM | strategy needs to be defined above the point where the L4S AQM | |||
persistently saturates to an ECN marking probability of 100% leaving | persistently saturates to an ECN marking probability of 100%, leaving | |||
no room to push back the load any harder. L4S ECN marking will | no room to push back the load any harder. L4S ECN marking will | |||
saturate first (assuming the coupling factor k>1), even though | saturate first (assuming the coupling factor k>1), even though | |||
saturation could be caused by the sum of unresponsive traffic in | saturation could be caused by the sum of unresponsive traffic in | |||
either or both queues exceeding the link capacity.</t> | either or both queues exceeding the link capacity.</t> | |||
<t>The term 'unresponsive' includes cases where a flow becomes | <t>The term 'unresponsive' includes cases where a flow becomes | |||
temporarily unresponsive, for instance, a real-time flow that takes | temporarily unresponsive, for instance, a real-time flow that takes | |||
a while to adapt its rate in response to congestion, or a standard | a while to adapt its rate in response to congestion, or a standard | |||
Reno flow that is normally responsive, but above a certain | Reno flow that is normally responsive, but above a certain | |||
congestion level it will not be able to reduce its congestion window | congestion level it will not be able to reduce its congestion window | |||
below the allowed minimum of 2 segments <xref target="RFC5681" format= | below the allowed minimum of 2 segments <xref target="RFC5681" format= | |||
"default"/>, effectively becoming unresponsive. (Note that | "default"/>, effectively becoming unresponsive. (Note that | |||
L4S traffic ought to remain responsive below a window of 2 segments | L4S traffic ought to remain responsive below a window of 2 segments. | |||
(see the L4S requirements <xref target="I-D.ietf-tsvwg-ecn-l4s-id" for | See the L4S requirements <xref target="RFC9331" format="default"/>.)</ | |||
mat="default"/>).</t> | t> | |||
<t>Saturation raises the question of whether to relieve congestion | <t>Saturation raises the question of whether to relieve congestion | |||
by introducing some drop into the L4S queue or by allowing delay to | by introducing some drop into the L4S queue or by allowing delay to | |||
grow in both queues (which could eventually lead to drop due to | grow in both queues (which could eventually lead to drop due to | |||
buffer exhaustion anyway):</t> | buffer exhaustion anyway):</t> | |||
<dl newline="false" spacing="normal"> | <dl newline="true" spacing="normal"> | |||
<dt>Drop on Saturation:</dt> | <dt>Drop on Saturation:</dt> | |||
<dd>Persistent saturation can be | <dd>Persistent saturation can be | |||
defined by a maximum threshold for coupled L4S ECN marking | defined by a maximum threshold for coupled L4S ECN marking | |||
(assuming k>1) before saturation starts to make the flow | (assuming k>1) before saturation starts to make the flow | |||
rates of the different traffic types diverge. Above that, the | rates of the different traffic types diverge. Above that, the | |||
drop probability of Classic traffic is applied to all packets of | drop probability of Classic traffic is applied to all packets of | |||
all traffic types. Then experiments have shown that queueing | all traffic types. Then experiments have shown that queuing | |||
delay can be kept at the target in any overload situation, | delay can be kept at the target in any overload situation, | |||
including with unresponsive traffic, and no further measures are | including with unresponsive traffic, and no further measures are | |||
required (<xref target="dualq_overload_unresp_ect" format="default "/>).</dd> | required (<xref target="dualq_overload_unresp_ect" format="default "/>).</dd> | |||
<dt>Delay on Saturation:</dt> | <dt>Delay on Saturation:</dt> | |||
<dd>When L4S marking saturates, | <dd>When L4S marking saturates, | |||
instead of introducing L4S drop, the drop and marking | instead of introducing L4S drop, the drop and marking | |||
probabilities of both queues could be capped. Beyond that, delay | probabilities of both queues could be capped. Beyond that, delay | |||
will grow either solely in the queue with unresponsive traffic | will grow either solely in the queue with unresponsive traffic | |||
(if WRR is used), or in both queues (if time-shifted FIFO is | (if WRR is used) or in both queues (if TS-FIFO is | |||
used). In either case, the higher delay ought to control | used). In either case, the higher delay ought to control | |||
temporary high congestion. If the overload is more persistent, | temporary high congestion. If the overload is more persistent, | |||
eventually the combined DualQ will overflow and tail drop will | eventually the combined DualQ will overflow and tail drop will | |||
control congestion.</dd> | control congestion.</dd> | |||
</dl> | </dl> | |||
<t>The example implementation in <xref target="dualq_Ex_algo_pi2" form at="default"/> | <t>The example implementation in <xref target="dualq_Ex_algo_pi2" form at="default"/> | |||
solely applies the "drop on saturation" policy. The DOCSIS | solely applies the "drop on saturation" policy. The DOCSIS | |||
specification of a DualQ Coupled AQM <xref target="DOCSIS3.1" format=" default"/> | specification of a DualQ Coupled AQM <xref target="DOCSIS3.1" format=" default"/> | |||
also implements the 'drop on saturation' policy with a very shallow | also implements the 'drop on saturation' policy with a very shallow | |||
L buffer. However, the addition of DOCSIS per-flow Queue Protection | L buffer. However, the addition of DOCSIS per-flow Queue Protection | |||
<xref target="I-D.briscoe-docsis-q-protection" format="default"/> turn s this into | <xref target="I-D.briscoe-docsis-q-protection" format="default"/> turn s this into | |||
'delay on saturation' by redirecting some packets of the flow(s) | 'delay on saturation' by redirecting some packets of the flow or flows | |||
most responsible for L queue overload into the C queue, which has a | that are most responsible for L queue overload into the C queue, which | |||
has a | ||||
higher delay target. If overload continues, this again becomes 'drop | higher delay target. If overload continues, this again becomes 'drop | |||
on saturation' as the level of drop in the C queue rises to maintain | on saturation' as the level of drop in the C queue rises to maintain | |||
the target delay of the C queue.</t> | the target delay of the C queue.</t> | |||
<section anchor="dualq_overload_unresp_ect" numbered="true" toc="defau lt"> | <section anchor="dualq_overload_unresp_ect" numbered="true" toc="defau lt"> | |||
<name>Protecting against Overload by Unresponsive ECN-Capable Traffi c</name> | <name>Protecting against Overload by Unresponsive ECN-Capable Traffi c</name> | |||
<t>Without a specific overload mechanism, unresponsive traffic | <t>Without a specific overload mechanism, unresponsive traffic | |||
would have a greater advantage if it were also ECN-capable. The | would have a greater advantage if it were also ECN-capable. The | |||
advantage is undetectable at normal low levels of marking. | advantage is undetectable at normal low levels of marking. | |||
However, it would become significant with the higher levels of | However, it would become significant with the higher levels of | |||
marking typical during overload, when it could evade a significant | marking typical during overload, when it could evade a significant | |||
degree of drop. This is an issue whether the ECN-capable traffic | degree of drop. This is an issue whether the ECN-capable traffic | |||
is L4S or Classic.</t> | is L4S or Classic.</t> | |||
<t>This raises the question of whether and when to introduce drop | <t>This raises the question of whether and when to introduce drop | |||
of ECN-capable traffic, as required by both Section 7 of the ECN | of ECN-capable traffic, as required by both Section <xref target="RF | |||
spec <xref target="RFC3168" format="default"/> and Section 4.2.1 of | C3168" sectionFormat="bare" section="7"/> of the ECN spec <xref target="RFC3168" | |||
the AQM | format="default"/> and Section <xref target="RFC7567" sectionFormat="bare" sect | |||
recommendations <xref target="RFC7567" format="default"/>.</t> | ion="4.2.1"/> of the AQM | |||
recommendations <xref target="RFC7567" format="default"/>.</t> | ||||
<t>As an example, experiments with the DualPI2 AQM (<xref target="du alq_Ex_algo_pi2" format="default"/>) have shown that introducing 'drop on | <t>As an example, experiments with the DualPI2 AQM (<xref target="du alq_Ex_algo_pi2" format="default"/>) have shown that introducing 'drop on | |||
saturation' at 100% coupled L4S marking addresses this problem | saturation' at 100% coupled L4S marking addresses this problem | |||
with unresponsive ECN as well as addressing the saturation | with unresponsive ECN, and it also addresses the saturation | |||
problem. At saturation, DualPI2 switches into overload mode, where | problem. At saturation, DualPI2 switches into overload mode, where | |||
the base AQM is driven by the max delay of both queues and it | the Base AQM is driven by the max delay of both queues, and it | |||
introduces probabilistic drop to both queues equally. It leaves | introduces probabilistic drop to both queues equally. | |||
It leaves | ||||
only a small range of congestion levels just below saturation | only a small range of congestion levels just below saturation | |||
where unresponsive traffic gains any advantage from using the ECN | where unresponsive traffic gains any advantage from using the ECN | |||
capability (relative to being unresponsive without ECN), and the | capability (relative to being unresponsive without ECN), and the | |||
advantage is hardly detectable (see <xref target="DualQ-Test" format ="default"/> | advantage is hardly detectable (see <xref target="DualQ-Test" format ="default"/> | |||
and section IV-E of <xref target="DCttH19" format="default"/>. Also overload with | and section IV-G of <xref target="L4Seval22" format="default"/>). Al so, overload with | |||
an unresponsive ECT(1) flow gets no more bandwidth advantage than | an unresponsive ECT(1) flow gets no more bandwidth advantage than | |||
with ECT(0).</t> | with ECT(0).</t> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
</middle> | </middle> | |||
<!-- *****BACK MATTER ***** --> | ||||
<back> | <back> | |||
<displayreference target="I-D.briscoe-tsvwg-l4s-diffserv" to="L4S-DIFFSERV"/> | ||||
<displayreference target="I-D.briscoe-docsis-q-protection" to="DOCSIS-Q-PROT"/> | ||||
<displayreference target="I-D.cardwell-iccrg-bbr-congestion-control" to="BBR-CC" | ||||
/> | ||||
<displayreference target="I-D.briscoe-iccrg-prague-congestion-control" to="PRAGU | ||||
E-CC"/> | ||||
<displayreference target="I-D.mathis-iccrg-relentless-tcp" to="RELENTLESS"/> | ||||
<references> | <references> | |||
<name>References</name> | <name>References</name> | |||
<references> | <references> | |||
<name>Normative References</name> | <name>Normative References</name> | |||
<reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2 | ||||
119" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119. | |||
<front> | xml"/> | |||
<title>Key words for use in RFCs to Indicate Requirement Levels</tit | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3168. | |||
le> | xml"/> | |||
<author initials="S." surname="Bradner" fullname="S. Bradner"> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8311. | |||
<organization/> | xml"/> | |||
</author> | ||||
<date year="1997" month="March"/> | <!-- [I-D.ietf-tsvwg-ecn-l4s-id] companion doc 9331 - title matches as of 1/17/2 | |||
<abstract> | 3--> | |||
<t>In many standards track documents several words are used to sig | <reference anchor='RFC9331' target='https://www.rfc-editor.org/info/rfc9331'> | |||
nify the requirements in the specification. These words are often capitalized. | <front> | |||
This document defines these words as they should be interpreted in IETF document | <title>The Explicit Congestion Notification (ECN) Protocol for Low Latency, Low | |||
s. This document specifies an Internet Best Current Practices for the Internet | Loss, and Scalable Throughput (L4S)</title> | |||
Community, and requests discussion and suggestions for improvements.</t> | <author initials='K' surname='De Schepper' fullname='Koen De Schepper'> | |||
</abstract> | <organization /> | |||
</front> | </author> | |||
<seriesInfo name="BCP" value="14"/> | <author initials='B' surname='Briscoe' fullname='Bob Briscoe' role='editor'> | |||
<seriesInfo name="RFC" value="2119"/> | <organization /> | |||
<seriesInfo name="DOI" value="10.17487/RFC2119"/> | </author> | |||
</reference> | <date month='January' year='2023'/> | |||
<reference anchor="RFC3168" target="https://www.rfc-editor.org/info/rfc3 | </front> | |||
168" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3168.xml"> | <seriesInfo name="RFC" value="9331"/> | |||
<front> | <seriesInfo name="DOI" value="10.17487/RFC9331"/> | |||
<title>The Addition of Explicit Congestion Notification (ECN) to IP< | </reference> | |||
/title> | ||||
<author initials="K." surname="Ramakrishnan" fullname="K. Ramakrishn | ||||
an"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Black" fullname="D. Black"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2001" month="September"/> | ||||
<abstract> | ||||
<t>This memo specifies the incorporation of ECN (Explicit Congesti | ||||
on Notification) to TCP and IP, including ECN's use of two bits in the IP header | ||||
. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="3168"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC3168"/> | ||||
</reference> | ||||
<reference anchor="RFC8311" target="https://www.rfc-editor.org/info/rfc8 | ||||
311" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8311.xml"> | ||||
<front> | ||||
<title>Relaxing Restrictions on Explicit Congestion Notification (EC | ||||
N) Experimentation</title> | ||||
<author initials="D." surname="Black" fullname="D. Black"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2018" month="January"/> | ||||
<abstract> | ||||
<t>This memo updates RFC 3168, which specifies Explicit Congestion | ||||
Notification (ECN) as an alternative to packet drops for indicating network con | ||||
gestion to endpoints. It relaxes restrictions in RFC 3168 that hinder experimen | ||||
tation towards benefits beyond just removal of loss. This memo summarizes the a | ||||
nticipated areas of experimentation and updates RFC 3168 to enable experimentati | ||||
on in these areas. An Experimental RFC in the IETF document stream is required | ||||
to take advantage of any of these enabling updates. In addition, this memo make | ||||
s related updates to the ECN specifications for RTP in RFC 6679 and for the Data | ||||
gram Congestion Control Protocol (DCCP) in RFCs 4341, 4342, and 5622. This memo | ||||
also records the conclusion of the ECN nonce experiment in RFC 3540 and provide | ||||
s the rationale for reclassification of RFC 3540 from Experimental to Historic; | ||||
this reclassification enables new experimental use of the ECT(1) codepoint.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8311"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8311"/> | ||||
</reference> | ||||
<reference anchor="I-D.ietf-tsvwg-ecn-l4s-id" target="https://datatracke | ||||
r.ietf.org/api/v1/doc/document/draft-ietf-tsvwg-ecn-l4s-id/" xml:base="https://b | ||||
ib.ietf.org/public/rfc/bibxml-ids/reference.I-D.ietf-tsvwg-ecn-l4s-id.xml"> | ||||
<front> | ||||
<title>Explicit Congestion Notification (ECN) Protocol for Very Low | ||||
Queuing Delay (L4S)</title> | ||||
<author fullname="Koen De Schepper"/> | ||||
<author fullname="Bob Briscoe"/> | ||||
<date day="8" month="August" year="2022"/> | ||||
<abstract> | ||||
<t>This specification defines the protocol to be used for a new ne | ||||
twork | ||||
service called low latency, low loss and scalable throughput (L4S). | ||||
L4S uses an Explicit Congestion Notification (ECN) scheme at the IP | ||||
layer that is similar to the original (or 'Classic') ECN approach, | ||||
except as specified within. L4S uses 'scalable' congestion control, | ||||
which induces much more frequent control signals from the network and | ||||
it responds to them with much more fine-grained adjustments, so that | ||||
very low (typically sub-millisecond on average) and consistently low | ||||
queuing delay becomes possible for L4S traffic without compromising | ||||
link utilization. Thus even capacity-seeking (TCP-like) traffic can | ||||
have high bandwidth and very low delay at the same time, even during | ||||
periods of high traffic load.</t> | ||||
<t>The L4S identifier defined in this document distinguishes L4S f | ||||
rom | ||||
'Classic' (e.g. TCP-Reno-friendly) traffic. Then, network | ||||
bottlenecks can be incrementally modified to distinguish and isolate | ||||
existing traffic that still follows the Classic behaviour, to prevent | ||||
it degrading the low queuing delay and low loss of L4S traffic. This | ||||
experimental track specification defines the rules that L4S | ||||
transports and network elements need to follow, with the intention | ||||
that L4S flows neither harm each other's performance nor that of | ||||
Classic traffic. It also suggests open questions to be investigated | ||||
during experimentation. Examples of new active queue management | ||||
(AQM) marking algorithms and examples of new transports (whether TCP- | ||||
like or real-time) are specified separately.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-ietf-tsvwg-ecn-l4s-id-2 | ||||
8"/> | ||||
</reference> | ||||
</references> | </references> | |||
<references> | <references> | |||
<name>Informative References</name> | <name>Informative References</name> | |||
<reference anchor="RFC0970" target="https://www.rfc-editor.org/info/rfc9 | ||||
70" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.0970.xml"> | <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0970. | |||
<front> | xml"/> | |||
<title>On Packet Switches With Infinite Storage</title> | ||||
<author initials="J." surname="Nagle" fullname="J. Nagle"> | <reference anchor="RED" target="https://dl.acm.org/doi/10.1109/90.251892 | |||
<organization/> | "> | |||
</author> | ||||
<date year="1985" month="December"/> | ||||
<abstract> | ||||
<t>The purpose of this RFC is to focus discussion on a particular | ||||
problem in the ARPA-Internet and possible methods of solution. Most prior wo | ||||
rk on congestion in datagram systems focuses on buffer management. In this | ||||
memo the case of a packet switch with infinite storage is considered. Such | ||||
a packet switch can never run out of buffers. It can, however, still become | ||||
congested. The meaning of congestion in an infinite-storage system is explor | ||||
ed. An unexpected result is found that shows a datagram network with infinit | ||||
e storage, first-in-first-out queuing, at least two packet switches, and a fi | ||||
nite packet lifetime will, under overload, drop all packets. By attacking th | ||||
e problem of congestion for the infinite-storage case, new solutions applicab | ||||
le to switches with finite storage may be found. No proposed solutions this | ||||
document are intended as standards for the ARPA-Internet at this time.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="970"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC0970"/> | ||||
</reference> | ||||
<reference anchor="RFC2309" target="https://www.rfc-editor.org/info/rfc2 | ||||
309" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2309.xml"> | ||||
<front> | ||||
<title>Recommendations on Queue Management and Congestion Avoidance | ||||
in the Internet</title> | ||||
<author initials="B." surname="Braden" fullname="B. Braden"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Clark" fullname="D. Clark"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J." surname="Crowcroft" fullname="J. Crowcroft"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="B." surname="Davie" fullname="B. Davie"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Deering" fullname="S. Deering"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Estrin" fullname="D. Estrin"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="V." surname="Jacobson" fullname="V. Jacobson"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="G." surname="Minshall" fullname="G. Minshall"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="C." surname="Partridge" fullname="C. Partridge"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="L." surname="Peterson" fullname="L. Peterson"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="K." surname="Ramakrishnan" fullname="K. Ramakrishn | ||||
an"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Shenker" fullname="S. Shenker"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J." surname="Wroclawski" fullname="J. Wroclawski"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="L." surname="Zhang" fullname="L. Zhang"> | ||||
<organization/> | ||||
</author> | ||||
<date year="1998" month="April"/> | ||||
<abstract> | ||||
<t>This memo presents two recommendations to the Internet communit | ||||
y concerning measures to improve and preserve Internet performance. It presents | ||||
a strong recommendation for testing, standardization, and widespread deployment | ||||
of active queue management in routers, to improve the performance of today's In | ||||
ternet. It also urges a concerted effort of research, measurement, and ultimate | ||||
deployment of router mechanisms to protect the Internet from flows that are not | ||||
sufficiently responsive to congestion notification. This memo provides informa | ||||
tion for the Internet community. It does not specify an Internet standard of an | ||||
y kind.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="2309"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2309"/> | ||||
</reference> | ||||
<reference anchor="RFC2914" target="https://www.rfc-editor.org/info/rfc2 | ||||
914" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2914.xml"> | ||||
<front> | ||||
<title>Congestion Control Principles</title> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2000" month="September"/> | ||||
<abstract> | ||||
<t>The goal of this document is to explain the need for congestion | ||||
control in the Internet, and to discuss what constitutes correct congestion con | ||||
trol. This document specifies an Internet Best Current Practices for the Intern | ||||
et Community, and requests discussion and suggestions for improvements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="41"/> | ||||
<seriesInfo name="RFC" value="2914"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2914"/> | ||||
</reference> | ||||
<reference anchor="RFC3246" target="https://www.rfc-editor.org/info/rfc3 | ||||
246" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3246.xml"> | ||||
<front> | ||||
<title>An Expedited Forwarding PHB (Per-Hop Behavior)</title> | ||||
<author initials="B." surname="Davie" fullname="B. Davie"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="A." surname="Charny" fullname="A. Charny"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J.C.R." surname="Bennet" fullname="J.C.R. Bennet"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="K." surname="Benson" fullname="K. Benson"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J.Y." surname="Le Boudec" fullname="J.Y. Le Boudec | ||||
"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="W." surname="Courtney" fullname="W. Courtney"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Davari" fullname="S. Davari"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="V." surname="Firoiu" fullname="V. Firoiu"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Stiliadis" fullname="D. Stiliadis"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2002" month="March"/> | ||||
<abstract> | ||||
<t>This document defines a PHB (per-hop behavior) called Expedited | ||||
Forwarding (EF). The PHB is a basic building block in the Differentiated Servi | ||||
ces architecture. EF is intended to provide a building block for low delay, low | ||||
jitter and low loss services by ensuring that the EF aggregate is served at a c | ||||
ertain configured rate. This document obsoletes RFC 2598. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="3246"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC3246"/> | ||||
</reference> | ||||
<reference anchor="RFC3649" target="https://www.rfc-editor.org/info/rfc3 | ||||
649" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3649.xml"> | ||||
<front> | ||||
<title>HighSpeed TCP for Large Congestion Windows</title> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2003" month="December"/> | ||||
<abstract> | ||||
<t>The proposals in this document are experimental. While they ma | ||||
y be deployed in the current Internet, they do not represent a consensus that th | ||||
is is the best method for high-speed congestion control. In particular, we note | ||||
that alternative experimental proposals are likely to be forthcoming, and it is | ||||
not well understood how the proposals in this document will interact with such | ||||
alternative proposals. This document proposes HighSpeed TCP, a modification to | ||||
TCP's congestion control mechanism for use with TCP connections with large conge | ||||
stion windows. The congestion control mechanisms of the current Standard TCP co | ||||
nstrains the congestion windows that can be achieved by TCP in realistic environ | ||||
ments. For example, for a Standard TCP connection with 1500-byte packets and a | ||||
100 ms round-trip time, achieving a steady-state throughput of 10 Gbps would req | ||||
uire an average congestion window of 83,333 segments, and a packet drop rate of | ||||
at most one congestion event every 5,000,000,000 packets (or equivalently, at mo | ||||
st one congestion event every 1 2/3 hours). This is widely acknowledged as an u | ||||
nrealistic constraint. To address his limitation of TCP, this document proposes | ||||
HighSpeed TCP, and solicits experimentation and feedback from the wider communi | ||||
ty.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="3649"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC3649"/> | ||||
</reference> | ||||
<reference anchor="RFC5033" target="https://www.rfc-editor.org/info/rfc5 | ||||
033" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5033.xml"> | ||||
<front> | ||||
<title>Specifying New Congestion Control Algorithms</title> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="M." surname="Allman" fullname="M. Allman"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2007" month="August"/> | ||||
<abstract> | ||||
<t>The IETF's standard congestion control schemes have been widely | ||||
shown to be inadequate for various environments (e.g., high-speed networks). R | ||||
ecent research has yielded many alternate congestion control schemes that signif | ||||
icantly differ from the IETF's congestion control principles. Using these new c | ||||
ongestion control schemes in the global Internet has possible ramifications to b | ||||
oth the traffic using the new congestion control and to traffic using the curren | ||||
tly standardized congestion control. Therefore, the IETF must proceed with caut | ||||
ion when dealing with alternate congestion control proposals. The goal of this | ||||
document is to provide guidance for considering alternate congestion control alg | ||||
orithms within the IETF. This document specifies an Internet Best Current Pract | ||||
ices for the Internet Community, and requests discussion and suggestions for imp | ||||
rovements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="133"/> | ||||
<seriesInfo name="RFC" value="5033"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5033"/> | ||||
</reference> | ||||
<reference anchor="RFC5348" target="https://www.rfc-editor.org/info/rfc5 | ||||
348" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5348.xml"> | ||||
<front> | ||||
<title>TCP Friendly Rate Control (TFRC): Protocol Specification</tit | ||||
le> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="M." surname="Handley" fullname="M. Handley"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J." surname="Padhye" fullname="J. Padhye"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J." surname="Widmer" fullname="J. Widmer"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2008" month="September"/> | ||||
<abstract> | ||||
<t>This document specifies TCP Friendly Rate Control (TFRC). TFRC | ||||
is a congestion control mechanism for unicast flows operating in a best-effort | ||||
Internet environment. It is reasonably fair when competing for bandwidth with T | ||||
CP flows, but has a much lower variation of throughput over time compared with T | ||||
CP, making it more suitable for applications such as streaming media where a rel | ||||
atively smooth sending rate is of importance.</t> | ||||
<t>This document obsoletes RFC 3448 and updates RFC 4342. [STANDA | ||||
RDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="5348"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5348"/> | ||||
</reference> | ||||
<reference anchor="RFC5681" target="https://www.rfc-editor.org/info/rfc5 | ||||
681" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5681.xml"> | ||||
<front> | ||||
<title>TCP Congestion Control</title> | ||||
<author initials="M." surname="Allman" fullname="M. Allman"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="V." surname="Paxson" fullname="V. Paxson"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="E." surname="Blanton" fullname="E. Blanton"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2009" month="September"/> | ||||
<abstract> | ||||
<t>This document defines TCP's four intertwined congestion control | ||||
algorithms: slow start, congestion avoidance, fast retransmit, and fast recover | ||||
y. In addition, the document specifies how TCP should begin transmission after | ||||
a relatively long idle period, as well as discussing various acknowledgment gene | ||||
ration methods. This document obsoletes RFC 2581. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="5681"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5681"/> | ||||
</reference> | ||||
<reference anchor="RFC5706" target="https://www.rfc-editor.org/info/rfc5 | ||||
706" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5706.xml"> | ||||
<front> | ||||
<title>Guidelines for Considering Operations and Management of New P | ||||
rotocols and Protocol Extensions</title> | ||||
<author initials="D." surname="Harrington" fullname="D. Harrington"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2009" month="November"/> | ||||
<abstract> | ||||
<t>New protocols or protocol extensions are best designed with due | ||||
consideration of the functionality needed to operate and manage the protocols. | ||||
Retrofitting operations and management is sub-optimal. The purpose of this docu | ||||
ment is to provide guidance to authors and reviewers of documents that define ne | ||||
w protocols or protocol extensions regarding aspects of operations and managemen | ||||
t that should be considered. This memo provides information for the Internet co | ||||
mmunity.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="5706"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5706"/> | ||||
</reference> | ||||
<reference anchor="RFC7567" target="https://www.rfc-editor.org/info/rfc7 | ||||
567" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7567.xml"> | ||||
<front> | ||||
<title>IETF Recommendations Regarding Active Queue Management</title | ||||
> | ||||
<author initials="F." surname="Baker" fullname="F. Baker" role="edit | ||||
or"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="G." surname="Fairhurst" fullname="G. Fairhurst" ro | ||||
le="editor"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2015" month="July"/> | ||||
<abstract> | ||||
<t>This memo presents recommendations to the Internet community co | ||||
ncerning measures to improve and preserve Internet performance. It presents a s | ||||
trong recommendation for testing, standardization, and widespread deployment of | ||||
active queue management (AQM) in network devices to improve the performance of t | ||||
oday's Internet. It also urges a concerted effort of research, measurement, and | ||||
ultimate deployment of AQM mechanisms to protect the Internet from flows that a | ||||
re not sufficiently responsive to congestion notification.</t> | ||||
<t>Based on 15 years of experience and new research, this document | ||||
replaces the recommendations of RFC 2309.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="197"/> | ||||
<seriesInfo name="RFC" value="7567"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC7567"/> | ||||
</reference> | ||||
<reference anchor="RFC8033" target="https://www.rfc-editor.org/info/rfc8 | ||||
033" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8033.xml"> | ||||
<front> | ||||
<title>Proportional Integral Controller Enhanced (PIE): A Lightweigh | ||||
t Control Scheme to Address the Bufferbloat Problem</title> | ||||
<author initials="R." surname="Pan" fullname="R. Pan"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="P." surname="Natarajan" fullname="P. Natarajan"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="F." surname="Baker" fullname="F. Baker"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="G." surname="White" fullname="G. White"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2017" month="February"/> | ||||
<abstract> | ||||
<t>Bufferbloat is a phenomenon in which excess buffers in the netw | ||||
ork cause high latency and latency variation. As more and more interactive appl | ||||
ications (e.g., voice over IP, real-time video streaming, and financial transact | ||||
ions) run in the Internet, high latency and latency variation degrade applicatio | ||||
n performance. There is a pressing need to design intelligent queue management | ||||
schemes that can control latency and latency variation, and hence provide desira | ||||
ble quality of service to users.</t> | ||||
<t>This document presents a lightweight active queue management de | ||||
sign called "PIE" (Proportional Integral controller Enhanced) that can effective | ||||
ly control the average queuing latency to a target value. Simulation results, th | ||||
eoretical analysis, and Linux testbed results have shown that PIE can ensure low | ||||
latency and achieve high link utilization under various congestion situations. | ||||
The design does not require per-packet timestamps, so it incurs very little ove | ||||
rhead and is simple enough to implement in both hardware and software.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8033"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8033"/> | ||||
</reference> | ||||
<reference anchor="RFC8034" target="https://www.rfc-editor.org/info/rfc8 | ||||
034" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8034.xml"> | ||||
<front> | ||||
<title>Active Queue Management (AQM) Based on Proportional Integral | ||||
Controller Enhanced PIE) for Data-Over-Cable Service Interface Specifications (D | ||||
OCSIS) Cable Modems</title> | ||||
<author initials="G." surname="White" fullname="G. White"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="R." surname="Pan" fullname="R. Pan"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2017" month="February"/> | ||||
<abstract> | ||||
<t>Cable modems based on Data-Over-Cable Service Interface Specifi | ||||
cations (DOCSIS) provide broadband Internet access to over one hundred million u | ||||
sers worldwide. In some cases, the cable modem connection is the bottleneck (lo | ||||
west speed) link between the customer and the Internet. As a result, the impact | ||||
of buffering and bufferbloat in the cable modem can have a significant effect o | ||||
n user experience. The CableLabs DOCSIS 3.1 specification introduces requiremen | ||||
ts for cable modems to support an Active Queue Management (AQM) algorithm that i | ||||
s intended to alleviate the impact that buffering has on latency-sensitive traff | ||||
ic, while preserving bulk throughput performance. In addition, the CableLabs DO | ||||
CSIS 3.0 specifications have also been amended to contain similar requirements. | ||||
This document describes the requirements on AQM that apply to DOCSIS equipment, | ||||
including a description of the "DOCSIS-PIE" algorithm that is required on DOCSI | ||||
S 3.1 cable modems.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8034"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8034"/> | ||||
</reference> | ||||
<reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8 | ||||
174" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"> | ||||
<front> | ||||
<title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</ti | ||||
tle> | ||||
<author initials="B." surname="Leiba" fullname="B. Leiba"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2017" month="May"/> | ||||
<abstract> | ||||
<t>RFC 2119 specifies common key words that may be used in protoco | ||||
l specifications. This document aims to reduce the ambiguity by clarifying tha | ||||
t only UPPERCASE usage of the key words have the defined special meanings.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="14"/> | ||||
<seriesInfo name="RFC" value="8174"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8174"/> | ||||
</reference> | ||||
<reference anchor="RFC8257" target="https://www.rfc-editor.org/info/rfc8 | ||||
257" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8257.xml"> | ||||
<front> | ||||
<title>Data Center TCP (DCTCP): TCP Congestion Control for Data Cent | ||||
ers</title> | ||||
<author initials="S." surname="Bensley" fullname="S. Bensley"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Thaler" fullname="D. Thaler"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="P." surname="Balasubramanian" fullname="P. Balasub | ||||
ramanian"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="L." surname="Eggert" fullname="L. Eggert"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="G." surname="Judd" fullname="G. Judd"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2017" month="October"/> | ||||
<abstract> | ||||
<t>This Informational RFC describes Data Center TCP (DCTCP): a TCP | ||||
congestion control scheme for data-center traffic. DCTCP extends the Explicit | ||||
Congestion Notification (ECN) processing to estimate the fraction of bytes that | ||||
encounter congestion rather than simply detecting that some congestion has occur | ||||
red. DCTCP then scales the TCP congestion window based on this estimate. This | ||||
method achieves high-burst tolerance, low latency, and high throughput with shal | ||||
low- buffered switches. This memo also discusses deployment issues related to t | ||||
he coexistence of DCTCP and conventional TCP, discusses the lack of a negotiatin | ||||
g mechanism between sender and receiver, and presents some possible mitigations. | ||||
This memo documents DCTCP as currently implemented by several major operating | ||||
systems. DCTCP, as described in this specification, is applicable to deployment | ||||
s in controlled environments like data centers, but it must not be deployed over | ||||
the public Internet without additional measures.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8257"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8257"/> | ||||
</reference> | ||||
<reference anchor="RFC8298" target="https://www.rfc-editor.org/info/rfc8 | ||||
298" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8298.xml"> | ||||
<front> | ||||
<title>Self-Clocked Rate Adaptation for Multimedia</title> | ||||
<author initials="I." surname="Johansson" fullname="I. Johansson"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="Z." surname="Sarker" fullname="Z. Sarker"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2017" month="December"/> | ||||
<abstract> | ||||
<t>This memo describes a rate adaptation algorithm for conversatio | ||||
nal media services such as interactive video. The solution conforms to the pack | ||||
et conservation principle and uses a hybrid loss-and-delay- based congestion con | ||||
trol algorithm. The algorithm is evaluated over both simulated Internet bottlen | ||||
eck scenarios as well as in a Long Term Evolution (LTE) system simulator and is | ||||
shown to achieve both low latency and high video throughput in these scenarios.< | ||||
/t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8298"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8298"/> | ||||
</reference> | ||||
<reference anchor="RFC8290" target="https://www.rfc-editor.org/info/rfc8 | ||||
290" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8290.xml"> | ||||
<front> | ||||
<title>The Flow Queue CoDel Packet Scheduler and Active Queue Manage | ||||
ment Algorithm</title> | ||||
<author initials="T." surname="Hoeiland-Joergensen" fullname="T. Hoe | ||||
iland-Joergensen"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="P." surname="McKenney" fullname="P. McKenney"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="D." surname="Taht" fullname="D. Taht"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="J." surname="Gettys" fullname="J. Gettys"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="E." surname="Dumazet" fullname="E. Dumazet"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2018" month="January"/> | ||||
<abstract> | ||||
<t>This memo presents the FQ-CoDel hybrid packet scheduler and Act | ||||
ive Queue Management (AQM) algorithm, a powerful tool for fighting bufferbloat a | ||||
nd reducing latency.</t> | ||||
<t>FQ-CoDel mixes packets from multiple flows and reduces the impa | ||||
ct of head-of-line blocking from bursty traffic. It provides isolation for low- | ||||
rate traffic such as DNS, web, and videoconferencing traffic. It improves utili | ||||
sation across the networking fabric, especially for bidirectional traffic, by ke | ||||
eping queue lengths short, and it can be implemented in a memory- and CPU-effici | ||||
ent fashion across a wide range of hardware.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8290"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8290"/> | ||||
</reference> | ||||
<reference anchor="RFC8312" target="https://www.rfc-editor.org/info/rfc8 | ||||
312" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8312.xml"> | ||||
<front> | ||||
<title>CUBIC for Fast Long-Distance Networks</title> | ||||
<author initials="I." surname="Rhee" fullname="I. Rhee"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="L." surname="Xu" fullname="L. Xu"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="S." surname="Ha" fullname="S. Ha"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="A." surname="Zimmermann" fullname="A. Zimmermann"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="L." surname="Eggert" fullname="L. Eggert"> | ||||
<organization/> | ||||
</author> | ||||
<author initials="R." surname="Scheffenegger" fullname="R. Scheffene | ||||
gger"> | ||||
<organization/> | ||||
</author> | ||||
<date year="2018" month="February"/> | ||||
<abstract> | ||||
<t>CUBIC is an extension to the current TCP standards. It differs | ||||
from the current TCP standards only in the congestion control algorithm on the | ||||
sender side. In particular, it uses a cubic function instead of a linear window | ||||
increase function of the current TCP standards to improve scalability and stabi | ||||
lity under fast and long-distance networks. CUBIC and its predecessor algorithm | ||||
have been adopted as defaults by Linux and have been used for many years. This | ||||
document provides a specification of CUBIC to enable third-party implementation | ||||
s and to solicit community feedback through experimentation on the performance o | ||||
f CUBIC.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8312"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8312"/> | ||||
</reference> | ||||
<reference anchor="RFC8404" target="https://www.rfc-editor.org/info/rfc8 | ||||
404" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8404.xml"> | ||||
<front> | <front> | |||
<title>Effects of Pervasive Encryption on Operators</title> | <title>Random Early Detection Gateways for Congestion Avoidance</tit | |||
<author initials="K." surname="Moriarty" fullname="K. Moriarty" role | le> | |||
="editor"> | <author fullname="Sally Floyd" initials="S." surname="Floyd"> | |||
<organization/> | <organization>UC Berkeley</organization> | |||
</author> | </author> | |||
<author initials="A." surname="Morton" fullname="A. Morton" role="ed | <author fullname="Van Jacobson" initials="V." surname="Jacobson"> | |||
itor"> | <organization>UC Berkeley</organization> | |||
<organization/> | ||||
</author> | </author> | |||
<date year="2018" month="July"/> | <date month="August" year="1993"/> | |||
<abstract> | ||||
<t>Pervasive monitoring attacks on the privacy of Internet users a | ||||
re of serious concern to both user and operator communities. RFC 7258 discusses | ||||
the critical need to protect users' privacy when developing IETF specifications | ||||
and also recognizes that making networks unmanageable to mitigate pervasive mon | ||||
itoring is not an acceptable outcome: an appropriate balance is needed. This do | ||||
cument discusses current security and network operations as well as management p | ||||
ractices that may be impacted by the shift to increased use of encryption to hel | ||||
p guide protocol development in support of manageable and secure networks.</t> | ||||
</abstract> | ||||
</front> | </front> | |||
<seriesInfo name="RFC" value="8404"/> | <seriesInfo name="DOI" value="10.1109/90.251892"/> | |||
<seriesInfo name="DOI" value="10.17487/RFC8404"/> | <refcontent>IEEE/ACM Transactions on Networking, Volume 1, Issue 4, pp | |||
. 397-413</refcontent> | ||||
</reference> | </reference> | |||
<reference anchor="ARED01" target="https://www.icir.org/floyd/red.html"> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2914. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3246. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3649. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5033. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5348. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5681. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5706. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7567. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8033. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8034. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8257. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8298. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8290. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8312. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8404. | ||||
xml"/> | ||||
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9000. | ||||
xml"/> | ||||
<reference anchor="ARED01" target="https://www.icsi.berkeley.edu/icsi/no | ||||
de/2032"> | ||||
<front> | <front> | |||
<title>Adaptive RED: An Algorithm for Increasing the Robustness of | <title>Adaptive RED: An Algorithm for Increasing the Robustness of | |||
RED's Active Queue Management</title> | RED's Active Queue Management</title> | |||
<author fullname="Sally Floyd" initials="S." surname="Floyd"> | <author fullname="Sally Floyd" initials="S." surname="Floyd"> | |||
<organization>ACIRI</organization> | <organization>ACIRI</organization> | |||
</author> | </author> | |||
<author fullname="Ramakrishna Gummadi" initials="R." surname="Gummad i"> | <author fullname="Ramakrishna Gummadi" initials="R." surname="Gummad i"> | |||
<organization>ACIRI</organization> | <organization>ACIRI</organization> | |||
</author> | </author> | |||
<author fullname="S. Shenker" initials="S." surname="Shenker"> | <author fullname="S. Shenker" initials="S." surname="Shenker"> | |||
<organization>ACIRI</organization> | <organization>ACIRI</organization> | |||
</author> | </author> | |||
<date month="August" year="2001"/> | <date month="August" year="2001"/> | |||
</front> | </front> | |||
<seriesInfo name="ACIRI Technical Report" value=""/> | <refcontent>ACIRI Technical Report 301</refcontent> | |||
</reference> | ||||
<reference anchor="I-D.ietf-tsvwg-l4s-arch" target="https://datatracker. | ||||
ietf.org/api/v1/doc/document/draft-ietf-tsvwg-l4s-arch/" xml:base="https://bib.i | ||||
etf.org/public/rfc/bibxml-ids/reference.I-D.ietf-tsvwg-l4s-arch.xml"> | ||||
<front> | ||||
<title>Low Latency, Low Loss, Scalable Throughput (L4S) Internet Ser | ||||
vice: Architecture</title> | ||||
<author fullname="Bob Briscoe"/> | ||||
<author fullname="Koen De Schepper"/> | ||||
<author fullname="Marcelo Bagnulo"/> | ||||
<author fullname="Greg White"/> | ||||
<date day="27" month="July" year="2022"/> | ||||
<abstract> | ||||
<t>This document describes the L4S architecture, which enables Int | ||||
ernet | ||||
applications to achieve Low queuing Latency, Low Loss, and Scalable | ||||
throughput (L4S). The insight on which L4S is based is that the root | ||||
cause of queuing delay is in the congestion controllers of senders, | ||||
not in the queue itself. With the L4S architecture all Internet | ||||
applications could (but do not have to) transition away from | ||||
congestion control algorithms that cause substantial queuing delay, | ||||
to a new class of congestion controls that induce very little | ||||
queuing, aided by explicit congestion signalling from the network. | ||||
This new class of congestion controls can provide low latency for | ||||
capacity-seeking flows, so applications can achieve both high | ||||
bandwidth and low latency.</t> | ||||
<t>The architecture primarily concerns incremental deployment. It | ||||
defines mechanisms that allow the new class of L4S congestion | ||||
controls to coexist with 'Classic' congestion controls in a shared | ||||
network. These mechanisms aim to ensure that the latency and | ||||
throughput performance using an L4S-compliant congestion controller | ||||
is usually much better (and rarely worse) than performance would have | ||||
been using a 'Classic' congestion controller, and that competing | ||||
flows continuing to use 'Classic' controllers are typically not | ||||
impacted by the presence of L4S. These characteristics are important | ||||
to encourage adoption of L4S congestion control algorithms and L4S | ||||
compliant network elements.</t> | ||||
<t>The L4S architecture consists of three components: network supp | ||||
ort to | ||||
isolate L4S traffic from classic traffic; protocol features that | ||||
allow network elements to identify L4S traffic; and host support for | ||||
L4S congestion controls. The protocol is defined separately as an | ||||
experimental change to Explicit Congestion Notification (ECN).</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-ietf-tsvwg-l4s-arch-19" | ||||
/> | ||||
</reference> | ||||
<reference anchor="I-D.briscoe-tsvwg-l4s-diffserv" target="https://datat | ||||
racker.ietf.org/api/v1/doc/document/draft-briscoe-tsvwg-l4s-diffserv/" xml:base= | ||||
"https://bib.ietf.org/public/rfc/bibxml-ids/reference.I-D.briscoe-tsvwg-l4s-diff | ||||
serv.xml"> | ||||
<front> | ||||
<title>Interactions between Low Latency, Low Loss, Scalable Throughp | ||||
ut (L4S) and Differentiated Services</title> | ||||
<author fullname="Bob Briscoe"/> | ||||
<date day="2" month="July" year="2018"/> | ||||
<abstract> | ||||
<t>L4S and Diffserv offer somewhat overlapping services (low laten | ||||
cy and | ||||
low loss), but bandwidth allocation is out of scope for L4S. | ||||
Therefore there is scope for the two approaches to complement each | ||||
other, but also to conflict. This informational document explains | ||||
how the two approaches interact, how they can be arranged to | ||||
complement each other and in which cases one can stand alone without | ||||
needing the other.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-briscoe-tsvwg-l4s-diffs | ||||
erv-02"/> | ||||
</reference> | ||||
<reference anchor="I-D.briscoe-docsis-q-protection" target="https://data | ||||
tracker.ietf.org/api/v1/doc/document/draft-briscoe-docsis-q-protection/" xml:bas | ||||
e="https://bib.ietf.org/public/rfc/bibxml-ids/reference.I-D.briscoe-docsis-q-pro | ||||
tection.xml"> | ||||
<front> | ||||
<title>The DOCSIS(r) Queue Protection Algorithm to Preserve Low Late | ||||
ncy</title> | ||||
<author fullname="Bob Briscoe"/> | ||||
<author fullname="Greg White"/> | ||||
<date day="13" month="May" year="2022"/> | ||||
<abstract> | ||||
<t>This informational document explains the specification of the q | ||||
ueue | ||||
protection algorithm used in DOCSIS technology since version 3.1. A | ||||
shared low latency queue relies on the non-queue-building behaviour | ||||
of every traffic flow using it. However, some flows might not take | ||||
such care, either accidentally or maliciously. If a queue is about | ||||
to exceed a threshold level of delay, the queue protection algorithm | ||||
can rapidly detect the flows most likely to be responsible. It can | ||||
then prevent harm to other traffic in the low latency queue by | ||||
ejecting selected packets (or all packets) of these flows. The | ||||
document is designed for four types of audience: a) congestion | ||||
control designers who need to understand how to keep on the 'good' | ||||
side of the algorithm; b) implementers of the algorithm who want to | ||||
understand it in more depth; c) designers of algorithms with similar | ||||
goals, perhaps for non-DOCSIS scenarios; and d) researchers | ||||
interested in evaluating the algorithm.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-briscoe-docsis-q-protec | ||||
tion-06"/> | ||||
</reference> | ||||
<reference anchor="I-D.cardwell-iccrg-bbr-congestion-control" target="ht | ||||
tps://datatracker.ietf.org/api/v1/doc/document/draft-cardwell-iccrg-bbr-congesti | ||||
on-control/" xml:base="https://bib.ietf.org/public/rfc/bibxml-ids/reference.I-D. | ||||
cardwell-iccrg-bbr-congestion-control.xml"> | ||||
<front> | ||||
<title>BBR Congestion Control</title> | ||||
<author fullname="Neal Cardwell"/> | ||||
<author fullname="Yuchung Cheng"/> | ||||
<author fullname="Soheil Hassas Yeganeh"/> | ||||
<author fullname="Ian Swett"/> | ||||
<author fullname="Van Jacobson"/> | ||||
<date day="7" month="March" year="2022"/> | ||||
<abstract> | ||||
<t>This document specifies the BBR congestion control algorithm. | ||||
BBR | ||||
("Bottleneck Bandwidth and Round-trip propagation time") uses recent | ||||
measurements of a transport connection's delivery rate, round-trip | ||||
time, and packet loss rate to build an explicit model of the network | ||||
path. BBR then uses this model to control both how fast it sends | ||||
data and the maximum volume of data it allows in flight in the | ||||
network at any time. Relative to loss-based congestion control | ||||
algorithms such as Reno [RFC5681] or CUBIC [RFC8312], BBR offers | ||||
substantially higher throughput for bottlenecks with shallow buffers | ||||
or random losses, and substantially lower queueing delays for | ||||
bottlenecks with deep buffers (avoiding "bufferbloat"). BBR can be | ||||
implemented in any transport protocol that supports packet-delivery | ||||
acknowledgment. Thus far, open source implementations are available | ||||
for TCP [RFC793] and QUIC [RFC9000]. This document specifies version | ||||
2 of the BBR algorithm, also sometimes referred to as BBRv2 or bbr2.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-cardwell-iccrg-bbr-cong | ||||
estion-control-02"/> | ||||
</reference> | ||||
<reference anchor="I-D.briscoe-iccrg-prague-congestion-control" target=" | ||||
https://datatracker.ietf.org/api/v1/doc/document/draft-briscoe-iccrg-prague-cong | ||||
estion-control/" xml:base="https://bib.ietf.org/public/rfc/bibxml-ids/reference. | ||||
I-D.briscoe-iccrg-prague-congestion-control.xml"> | ||||
<front> | ||||
<title>Prague Congestion Control</title> | ||||
<author fullname="Koen De Schepper"/> | ||||
<author fullname="Olivier Tilmans"/> | ||||
<author fullname="Bob Briscoe"/> | ||||
<date day="11" month="July" year="2022"/> | ||||
<abstract> | ||||
<t>This specification defines the Prague congestion control scheme | ||||
, | ||||
which is derived from DCTCP and adapted for Internet traffic by | ||||
implementing the Prague L4S requirements. Over paths with L4S | ||||
support at the bottleneck, it adapts the DCTCP mechanisms to achieve | ||||
consistently low latency and full throughput. It is defined | ||||
independently of any particular transport protocol or operating | ||||
system, but notes are added that highlight issues specific to certain | ||||
transports and OSs. It is mainly based on the current default | ||||
options of the reference Linux implementation of TCP Prague, but it | ||||
includes experience from other implementations where available. It | ||||
separately describes non-default and optional parts, as well as | ||||
future plans.</t> | ||||
<t>The implementation does not satisfy all the Prague requirements | ||||
(yet) | ||||
and the IETF might decide that certain requirements need to be | ||||
relaxed as an outcome of the process of trying to satisfy them all. | ||||
In two cases, research code is replaced by placeholders until full | ||||
evaluation is complete.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-briscoe-iccrg-prague-co | ||||
ngestion-control-01"/> | ||||
</reference> | ||||
<reference anchor="I-D.mathis-iccrg-relentless-tcp" target="https://www. | ||||
ietf.org/archive/id/draft-mathis-iccrg-relentless-tcp-00.txt" xml:base="https:// | ||||
bib.ietf.org/public/rfc/bibxml-ids/reference.I-D.mathis-iccrg-relentless-tcp.xml | ||||
"> | ||||
<front> | ||||
<title>Relentless Congestion Control</title> | ||||
<author fullname="Matt Mathis"/> | ||||
<date day="4" month="March" year="2009"/> | ||||
<abstract> | ||||
<t>Relentless congestion control is a simple modification that can | ||||
be applied to almost any AIMD style congestion control: instead of applying a m | ||||
ultiplicative reduction to cwnd after a loss, cwnd is reduced by the number of l | ||||
ost segments. It can be modeled as a strict implementation of van Jacobson's Pac | ||||
ket Conservation Principle. During recovery, new segments are injected into the | ||||
network in exact accordance with the segments that are reported to have been del | ||||
ivered to the receiver by the returning ACKs. This algorithm offers a valuable n | ||||
ew congestion control property: the TCP portion of the control loop has exactly | ||||
unity gain, which should make it easier to implement simple controllers in netwo | ||||
rk devices to accurately control queue sizes across a huge range of scales. Rele | ||||
ntless Congestion Control conforms to neither the details nor the philosophy of | ||||
current congestion control standards. These standards are based on the idea that | ||||
the Internet can attain sufficient fairness by having relatively simple network | ||||
devices send uniform congestion signals to all flows, and mandating that all pr | ||||
otocols have equivalent responses to these congestion signals. To function appro | ||||
priately in a shared environment, Relentless Congestion Control requires that th | ||||
e network allocates capacity through some technique such as Fair Queuing, Approx | ||||
imate Fair Dropping, etc. The salient features of these algorithms are that they | ||||
segregate the traffic into distinct flows, and send different congestion signal | ||||
s to each flow. This alternative congestion control paradigm is described in a s | ||||
eparate document, also under consideration by the ICCRG. The goal of the documen | ||||
t is to illustrate some new protocol features and properties might be possible i | ||||
f we relax the "TCP-friendly" mandate. A secondary goal of Relentless TCP is to | ||||
make a distinction between the bottlenecks that belong to protocol itself, vs st | ||||
andard congestion control and the "TCP-friendly" paradigm.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-mathis-iccrg-relentless | ||||
-tcp-00"/> | ||||
</reference> | </reference> | |||
<!--{ToDo: DCttH ref will need to be updated, once stable}--> | ||||
<reference anchor="DCttH19" target="https://bobbriscoe.net/pubs.html#DCttH | <!-- [I-D.ietf-tsvwg-l4s-arch] companion doc 9330 - title matches as of 1/17/23- | |||
_TR"> | -> | |||
<front> | <reference anchor='RFC9330' target='https://www.rfc-editor.org/info/rfc9330'> | |||
<title>`Data Centre to the Home': Ultra-Low Latency for All</title> | <front> | |||
<author fullname="Koen De Schepper" initials="K." surname="De Schepp | <title>Low Latency, Low Loss, and Scalable Throughput (L4S) Internet Service: Ar | |||
er"> | chitecture</title> | |||
<organization>Nokia Bell Labs</organization> | <author initials='B' surname='Briscoe' fullname='Bob Briscoe' role='editor'> | |||
</author> | </author> | |||
<author fullname="Olga Bondarenko" initials="O." surname="Bondarenko | <author initials='K' surname='De Schepper' fullname='Koen De Schepper'> | |||
"> | </author> | |||
<organization>Simula Research Lab</organization> | <author initials='M' surname='Bagnulo' fullname='Marcelo Bagnulo'> | |||
</author> | </author> | |||
<author fullname="Olivier" initials="O." surname="Tilmans"> | <author initials='G' surname='White' fullname='Greg White'> | |||
<organization>Nokia Bell Labs</organization> | </author> | |||
</author> | <date year='2023' month='January'/> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | </front> | |||
<organization>Independent (bobbriscoe.net)</organization> | <seriesInfo name="RFC" value="9330"/> | |||
</author> | <seriesInfo name="DOI" value="10.17487/RFC9330"/> | |||
<date month="July" year="2019"/> | </reference> | |||
</front> | ||||
<seriesInfo name="Updated RITE project Technical Report" value=""/> | <!-- [I-D.briscoe-tsvwg-l4s-diffserv] IESG state Expired as of 1/17/23 --> | |||
<format target="https://bobbriscoe.net/projects/latency/dctth_journal_ | <xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.briscoe | |||
draft20190726.pdf" type="PDF"/> | -tsvwg-l4s-diffserv.xml"/> | |||
</reference> | ||||
<reference anchor="PI2" target="https://riteproject.files.wordpress.com/ | <!-- [I-D.briscoe-docsis-q-protection] in MISSREF state as of 1/17/23 --> | |||
2015/10/pi2_conext.pdf"> | <xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.briscoe | |||
-docsis-q-protection.xml"/> | ||||
<!-- [I-D.cardwell-iccrg-bbr-congestion-control] IESG state Expired as of 1/17/2 | ||||
3 --> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.cardwel | ||||
l-iccrg-bbr-congestion-control.xml"/> | ||||
<!-- [I-D.briscoe-iccrg-prague-congestion-control] IESG state Expired as of 1/17 | ||||
/23 --> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.briscoe | ||||
-iccrg-prague-congestion-control.xml"/> | ||||
<!-- [I-D.mathis-iccrg-relentless-tcp] IESG state Expired as of 1/17/23 --> | ||||
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/reference.I-D.mathis- | ||||
iccrg-relentless-tcp.xml"/> | ||||
<reference anchor="PI2" target="https://dl.acm.org/doi/10.1145/2999572.2 | ||||
999578"> | ||||
<front> | <front> | |||
<title>PI2: A Linearized AQM for both Classic and Scalable | <title>PI2: A Linearized AQM for both Classic and Scalable | |||
TCP</title> | TCP</title> | |||
<author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Olga Bondarenko" initials="O." surname="Bondarenko "> | <author fullname="Olga Bondarenko" initials="O." surname="Bondarenko "> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization>BT</organization> | <organization>BT</organization> | |||
</author> | </author> | |||
<author fullname="Ing-jyh Tsang" initials="I." surname="Tsang"> | <author fullname="Ing-jyh Tsang" initials="I." surname="Tsang"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
<date month="December" year="2016"/> | <date month="December" year="2016"/> | |||
</front> | </front> | |||
<seriesInfo name="ACM CoNEXT'16" value=""/> | <seriesInfo name="DOI" value="10.1145/2999572.2999578"/> | |||
<refcontent>ACM CoNEXT'16</refcontent> | ||||
</reference> | </reference> | |||
<reference anchor="L4Sdemo16" target="https//dl.acm.org/citation.cfm?doi | ||||
d=2910017.2910633 (videos of demos: https://riteproject.eu/dctth/#1511dispatchwg | <reference anchor="L4Sdemo16" target="https://dl.acm.org/citation.cfm?do | |||
)"> | id=2910017.2910633"> | |||
<front> | <front> | |||
<title>Ultra-Low Delay for All: Live Experience, Live | <title>Ultra-Low Delay for All: Live Experience, Live Analysis</titl | |||
Analysis</title> | e> | |||
<author fullname="Olga Bondarenko" initials="O." surname="Bondarenko "> | <author fullname="Olga Bondarenko" initials="O." surname="Bondarenko "> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | |||
<organization>Bell Labs</organization> | <organization>Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Ing-jyh Tsang" initials="I." surname="Tsang"> | <author fullname="Ing-jyh Tsang" initials="I." surname="Tsang"> | |||
<organization>Bell Labs</organization> | <organization>Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization>BT</organization> | <organization>BT</organization> | |||
</author> | </author> | |||
<date month="May" year="2016"/> | <author fullname="Andreas Petlund" initials="A." surname="Petlund"> | |||
</author> | ||||
<author fullname="Carsten Griwodz" initials="C." surname="Griwodz"> | ||||
</author> | ||||
<date month="May" year="2016"/> | ||||
</front> | </front> | |||
<seriesInfo name="Proc. MMSYS'16" value="pp33:1--33:4"/> | <seriesInfo name="DOI" value="10.1145/2910017.2910633"/> | |||
<refcontent>Proceedings of the 7th International Conference on Multime | ||||
dia Systems, Article No. 33, pp. 1-4</refcontent> | ||||
<format target="https://dl.acm.org/citation.cfm?doid=2910017.2910633" type="PDF"/> | <format target="https://dl.acm.org/citation.cfm?doid=2910017.2910633" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="Dukkipati06" target="https://dl.acm.org/doi/10.1145/1 | ||||
111322.1111336"> | <reference anchor="L4Seval22" target="https://arxiv.org/abs/2209.01078"> | |||
<front> | ||||
<title>Dual Queue Coupled AQM: Deployable Very Low Queuing Delay for | ||||
All</title> | ||||
<author fullname="Koen De Schepper" initials="K." surname="De Schepper | ||||
"> | ||||
<organization>Nokia Bell Labs</organization> | ||||
</author> | ||||
<author fullname="Olga Albisser" initials="O." surname="Albisser"> | ||||
<organization>Simula Research Lab</organization> | ||||
</author> | ||||
<author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | ||||
<organization>Nokia Bell Labs</organization> | ||||
</author> | ||||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | ||||
<organization>Independent (bobbriscoe.net)</organization> | ||||
</author> | ||||
<date month="September" year="2022"/> | ||||
</front> | ||||
<seriesInfo name="DOI" value="10.48550/arXiv.2209.01078"/> | ||||
<refcontent>Preprint submitted to IEEE/ACM Transactions on Networking</r | ||||
efcontent> | ||||
</reference> | ||||
<reference anchor="Dukkipati06" target="https://dl.acm.org/doi/10.1145/1111322.1 | ||||
111336"> | ||||
<front> | <front> | |||
<title>Why Flow-Completion Time is the Right Metric for Congestion | <title>Why Flow-Completion Time is the Right Metric for Congestion C | |||
Control</title> | ontrol</title> | |||
<author fullname="Nandita Dukkipati" initials="N." surname="Dukkipat i"> | <author fullname="Nandita Dukkipati" initials="N." surname="Dukkipat i"> | |||
<organization>Stanford Uni</organization> | <organization>Stanford University</organization> | |||
</author> | </author> | |||
<author fullname="Nick McKeown" initials="N." surname="McKeown"> | <author fullname="Nick McKeown" initials="N." surname="McKeown"> | |||
<organization>Stanford Uni</organization> | <organization>Stanford University</organization> | |||
</author> | </author> | |||
<date month="January" year="2006"/> | <date month="January" year="2006"/> | |||
</front> | </front> | |||
<seriesInfo name="ACM CCR" value="36(1):59--62"/> | <seriesInfo name="DOI" value="10.1145/1111322.1111336"/> | |||
<format target="http://yuba.stanford.edu/rcp/flowCompTime-dukkipati.pd | <refcontent>ACM SIGCOMM Computer Communication Review, Vol. 36, Issue | |||
f" type="PDF"/> | 1, pp. 59-62</refcontent> | |||
</reference> | </reference> | |||
<!-- <reference anchor="DCTCP_Pitfalls" | ||||
target="http://blogs.usenix.org/conference/nsdi15/technical-ses | ||||
sions/presentation/judd"> | ||||
<front> | ||||
<title>Attaining the Promise and Avoiding the Pitfalls of TCP in the | ||||
Datacenter</title> | ||||
<author fullname="Glenn Judd" initials="G." surname="Judd"> | ||||
<organization>Morgan Stanley</organization> | ||||
</author> | ||||
<date month="May" year="2015"/> | ||||
</front> | ||||
<seriesInfo name="12th USENIX Symposium on Networked Systems Design and | ||||
Implementation (NSDI 15)" | ||||
value="145-157"/> | ||||
<format target="http://blogs.usenix.org/conference/nsdi15/technical-sess | ||||
ions/presentation/judd" | ||||
type="PDF"/> | ||||
</reference> | ||||
<reference anchor="CRED_Insights" target="https://arxiv.org/abs/1904.07339 "> | <reference anchor="CRED_Insights" target="https://arxiv.org/abs/1904.07339 "> | |||
<front> | <front> | |||
<title>Insights from Curvy RED (Random Early Detection)</title> | <title>Insights from Curvy RED (Random Early Detection)</title> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization>BT</organization> | <organization>BT</organization> | |||
</author> | </author> | |||
<date day="" month="July" year="2015"/> | <author fullname="Koen De Schepper" initials="K." surname="De Scheppe | |||
r"> | ||||
<organization>BT</organization> | ||||
</author> | ||||
<date month="August" year="2015"/> | ||||
</front> | </front> | |||
<seriesInfo name="BT Technical Report" value="TR-TUB8-2015-003 arXiv:1 | <seriesInfo name="DOI" value="10.48550/arXiv.1904.07339"/> | |||
904.07339 [cs.NI]"/> | <refcontent>BT Technical Report, TR-TUB8-2015-003</refcontent> | |||
<format target="https://arxiv.org/pdf/1904.07339" type="PDF"/> | <format target="https://arxiv.org/pdf/1904.07339" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="CoDel" target="https://queue.acm.org/issuedetail.cfm? issue=2208917"> | <reference anchor="CoDel" target="https://queue.acm.org/issuedetail.cfm? issue=2208917"> | |||
<front> | <front> | |||
<title>Controlling Queue Delay</title> | <title>Controlling Queue Delay</title> | |||
<author fullname="Kathleen Nichols" initials="K." surname="Nichols"> | <author fullname="Kathleen Nichols" initials="K." surname="Nichols"> | |||
<organization>PARC</organization> | <organization>PARC</organization> | |||
</author> | </author> | |||
<author fullname="Van Jacobson" initials="V." surname="Jacobson"> | <author fullname="Van Jacobson" initials="V." surname="Jacobson"> | |||
<organization>Pollere Inc</organization> | <organization>Pollere Inc</organization> | |||
</author> | </author> | |||
<date month="May" year="2012"/> | <date month="May" year="2012"/> | |||
</front> | </front> | |||
<seriesInfo name="ACM Queue" value="10(5)"/> | <refcontent>ACM Queue, Vol. 10, Issue 5</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="MEDF" target="https://infocom2003.ieee-infocom.org/pa | ||||
pers/27_04.PDF"> | <reference anchor="MEDF"> | |||
<front> | <front> | |||
<title>MEDF - a simple scheduling algorithm for two real-time | <title>MEDF - A Simple Scheduling Algorithm for Two Real-Time | |||
transport service classes with application in the UTRAN</title> | Transport Service Classes with Application in the UTRAN</title> | |||
<author fullname="Michael Menth " initials="M." surname="Menth"> | <author fullname="Michael Menth " initials="M." surname="Menth"> | |||
<organization>University of Wuerzburg</organization> | <organization>University of Wuerzburg</organization> | |||
</author> | </author> | |||
<author fullname="Matthias Schmid " initials="M." surname="Schmid"> | <author fullname="Matthias Schmid " initials="M." surname="Schmid"> | |||
<organization>Infosim AG</organization> | <organization>Infosim AG</organization> | |||
</author> | </author> | |||
<author fullname="Herbert Heiss" initials="H." surname="Heiss"> | <author fullname="Herbert Heiss" initials="H." surname="Heiss"> | |||
<organization>Siemens</organization> | <organization>Siemens</organization> | |||
</author> | </author> | |||
<author fullname="Thomas Reim" initials="T." surname="Reim"> | <author fullname="Thomas Reim" initials="T." surname="Reim"> | |||
<organization>Siemens</organization> | <organization>Siemens</organization> | |||
</author> | </author> | |||
<date month="March" year="2003"/> | <date month="March" year="2003"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc. IEEE Conference on Computer Communications (IN | <seriesInfo name="DOI" value="10.1109/INFCOM.2003.1208948"/> | |||
FOCOM'03)" value="Vol.2 pp.1116-1122"/> | <refcontent>Proc. IEEE Conference on Computer Communications (INFOCOM' | |||
03), Vol. 2, pp. 1116-1122</refcontent> | ||||
</reference> | </reference> | |||
<reference anchor="DualQ-Test" target="https://www.duo.uio.no/bitstream/ | ||||
handle/10852/57424/thesis-henrste.pdf?sequence=1"> | <reference anchor="DualQ-Test"> | |||
<front> | <front> | |||
<title>Destruction Testing: Ultra-Low Delay using Dual Queue Coupled | <title>Destruction Testing: Ultra-Low Delay using Dual Queue Coupled | |||
Active Queue Management</title> | Active Queue Management</title> | |||
<author fullname="Henrik Steen" initials="H." surname="Steen"> | <author fullname="Henrik Steen" initials="H." surname="Steen"> | |||
<organization>Uni Oslo</organization> | <organization>University of Oslo</organization> | |||
</author> | </author> | |||
<date month="May" year="2017"/> | <date month="May" year="2017"/> | |||
</front> | </front> | |||
<seriesInfo name="Master's Thesis, Dept of Informatics, Uni Oslo" valu e=""/> | <refcontent>Master's Thesis, Department of Informatics, University of Oslo</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="SigQ-Dyn" target="https://arxiv.org/abs/1904.07044"> | <reference anchor="SigQ-Dyn" target="https://arxiv.org/abs/1904.07044"> | |||
<front> | <front> | |||
<title>Rapid Signalling of Queue Dynamics</title> | <title>Rapid Signalling of Queue Dynamics</title> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization/> | <organization/> | |||
</author> | </author> | |||
<date month="September" year="2017"/> | <date month="September" year="2017"/> | |||
</front> | </front> | |||
<seriesInfo name="Technical Report" value="TR-BB-2017-001 arXiv:1904.0 | <seriesInfo name="DOI" value="10.48550/arXiv.1904.07044"/> | |||
7044 [cs.NI]"/> | <refcontent>Technical Report, TR-BB-2017-001</refcontent> | |||
<format target="https://arxiv.org/pdf/1904.07044" type="PDF"/> | <format target="https://arxiv.org/pdf/1904.07044" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="Alizadeh-stability" target="https://dl.acm.org/citati on.cfm?id=1993753"> | <reference anchor="Alizadeh-stability" target="https://dl.acm.org/citati on.cfm?id=1993753"> | |||
<front> | <front> | |||
<title>Analysis of DCTCP: Stability, Convergence, and | <title>Analysis of DCTCP: Stability, Convergence, and | |||
Fairness</title> | Fairness</title> | |||
<author fullname="Mohamed Alizadeh" initials="M." surname="Alizadeh" /> | <author fullname="Mohamed Alizadeh" initials="M." surname="Alizadeh" /> | |||
<author fullname="Adel Javanmard" initials="A." surname="Javanmard"/ > | <author fullname="Adel Javanmard" initials="A." surname="Javanmard"/ > | |||
<author fullname="Balaji Prabhakar" initials="B." surname="Prabhakar "/> | <author fullname="Balaji Prabhakar" initials="B." surname="Prabhakar "/> | |||
<date month="June" year="2011"/> | <date month="June" year="2011"/> | |||
</front> | </front> | |||
<seriesInfo name="ACM SIGMETRICS 2011" value=""/> | <seriesInfo name="DOI" value="10.1145/1993744.1993753"/> | |||
<refcontent>SIGMETRICS '11: Proceedings of the ACM SIGMETRICS Joint In | ||||
ternational Conference on Measurement and Modeling of Computer Systems, pp. 73-8 | ||||
4</refcontent> | ||||
<format target="https://people.csail.mit.edu/alizadeh/papers/dctcp_ana lysis-sigmetrics11.pdf" type="PDF"/> | <format target="https://people.csail.mit.edu/alizadeh/papers/dctcp_ana lysis-sigmetrics11.pdf" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="PragueLinux" target="https://www.netdevconf.org/0x13/ session.html?talk-tcp-prague-l4s"> | <reference anchor="PragueLinux" target="https://www.netdevconf.org/0x13/ session.html?talk-tcp-prague-l4s"> | |||
<front> | <front> | |||
<title>Implementing the `TCP Prague' Requirements for Low Latency | <title>Implementing the 'TCP Prague' Requirements for L4S</title> | |||
Low Loss Scalable Throughput (L4S)</title> | ||||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization>Independent</organization> | <organization>Independent</organization> | |||
</author> | </author> | |||
<author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Olga Albisser" initials="O." surname="Albisser"> | <author fullname="Olga Albisser" initials="O." surname="Albisser"> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<author fullname="Joakim Misund" initials="J." surname="Misund"> | <author fullname="Joakim Misund" initials="J." surname="Misund"> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | <author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Mirja Kühlewind" initials="M." surname="Kühlewind" > | <author fullname="Mirja Kuehlewind" initials="M." surname="Kuehlewin d"> | |||
<organization>ETH Zurich</organization> | <organization>ETH Zurich</organization> | |||
</author> | </author> | |||
<author fullname="Asad Sajjad Ahmed" initials="A.S." surname="Ahmed" > | <author fullname="Asad Sajjad Ahmed" initials="A." surname="Ahmed"> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<date month="March" year="2019"/> | <date month="March" year="2019"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc. Linux Netdev 0x13" value=""/> | <refcontent>Proceedings of Linux Netdev 0x13</refcontent> | |||
<format target="https://www.files.netdevconf.org/f/4d6939d5f1fb404fafd | ||||
1/?dl=1" type="PDF"/> | ||||
</reference> | </reference> | |||
<reference anchor="DualPI2Linux" target="https://www.netdevconf.org/0x13 /session.html?talk-DUALPI2-AQM"> | <reference anchor="DualPI2Linux" target="https://www.netdevconf.org/0x13 /session.html?talk-DUALPI2-AQM"> | |||
<front> | <front> | |||
<title>DUALPI2 - Low Latency, Low Loss and Scalable (L4S) | <title>DUALPI2 - Low Latency, Low Loss and Scalable (L4S) | |||
AQM</title> | AQM</title> | |||
<author fullname="Olga Albisser" initials="O." surname="Albisser"> | <author fullname="Olga Albisser" initials="O." surname="Albisser"> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
skipping to change at line 2210 ¶ | skipping to change at line 1643 ¶ | |||
<organization>Independent</organization> | <organization>Independent</organization> | |||
</author> | </author> | |||
<author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | <author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | |||
<organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
</author> | </author> | |||
<author fullname="Henrik Steen" initials="H." surname="Steen"> | <author fullname="Henrik Steen" initials="H." surname="Steen"> | |||
<organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
</author> | </author> | |||
<date month="March" year="2019"/> | <date month="March" year="2019"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc. Linux Netdev 0x13" value=""/> | <seriesInfo name="Proceedings of Linux Netdev 0x13" value=""/> | |||
<format target="https://www.files.netdevconf.org/f/febbe8c6a05b4ceab64 1/?dl=1" type="PDF"/> | <format target="https://www.files.netdevconf.org/f/febbe8c6a05b4ceab64 1/?dl=1" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="DOCSIS3.1" target="https://specification-search.cable | ||||
labs.com/CM-SP-MULPIv3.1"> | <reference anchor="DOCSIS3.1" target="https://specification-search.cable | |||
labs.com/CM-SP-MULPIv3"> | ||||
<front> | <front> | |||
<title>MAC and Upper Layer Protocols Interface (MULPI) | <title>DOCSIS 3.1 MAC and Upper Layer Protocols Interface Specificat | |||
Specification, CM-SP-MULPIv3.1</title> | ion</title> | |||
<author fullname="" surname=""> | <author fullname="" surname=""> | |||
<organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
</author> | </author> | |||
<date day="21" month="January" year="2019"/> | <date month="January" year="2019"/> | |||
</front> | </front> | |||
<seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS® 3.1" value="Version i17 or later"/> | <refcontent>CM-SP-MULPIv3.1, Data-Over-Cable Service Interface Specifi cations DOCSIS 3.1 Version I17 or later</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="LLD" target="https://cablela.bs/low-latency-docsis-te chnology-overview-february-2019"> | <reference anchor="LLD" target="https://cablela.bs/low-latency-docsis-te chnology-overview-february-2019"> | |||
<front> | <front> | |||
<title>Low Latency DOCSIS: Technology Overview</title> | <title>Low Latency DOCSIS: Technology Overview</title> | |||
<author fullname="Greg White" initials="G." surname="White"> | <author fullname="Greg White" initials="G." surname="White"> | |||
<organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
</author> | </author> | |||
<author fullname="Karthik Sundaresan" initials="K." surname="Sundare san"> | <author fullname="Karthik Sundaresan" initials="K." surname="Sundare san"> | |||
<organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
</author> | </author> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
</author> | </author> | |||
<date day="" month="February" year="2019"/> | <date day="" month="February" year="2019"/> | |||
</front> | </front> | |||
<seriesInfo name="CableLabs White Paper" value=""/> | <refcontent>CableLabs White Paper</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="AQMmetrics" target="https://www.cs.purdue.edu/homes/f ahmy/papers/ldc.pdf"> | <reference anchor="AQMmetrics" target="https://www.cs.purdue.edu/homes/f ahmy/papers/ldc.pdf"> | |||
<front> | <front> | |||
<title>A Comparison of Load-based and Queue- based Active Queue | <title>A Comparison of Load-based and Queue-based Active Queue | |||
Management Algorithms</title> | Management Algorithms</title> | |||
<author fullname="Minseok Kwon" initials="M." surname="Kwon"> | <author fullname="Minseok Kwon" initials="M." surname="Kwon"> | |||
<organization>Purdue Uni</organization> | <organization>Purdue University</organization> | |||
</author> | </author> | |||
<author fullname="Sonia Fahmy" initials="S." surname="Fahmy"> | <author fullname="Sonia Fahmy" initials="S." surname="Fahmy"> | |||
<organization>Purdue Uni</organization> | <organization>Purdue University</organization> | |||
</author> | </author> | |||
<date year="2002"/> | <date year="2002"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc. Int'l Soc. for Optical Engineering (SPIE)" val | <seriesInfo name="DOI" value="10.1117/12.473021"/> | |||
ue="4866:35--46 DOI: 10.1117/12.473021"/> | <refcontent>Proc. Int'l Soc. for Optical Engineering (SPIE), Vol. 4866 | |||
, pp. 35-46</refcontent> | ||||
</reference> | </reference> | |||
<reference anchor="CCcensus19" target="https://doi.org/10.1145/3366693"> | <reference anchor="CCcensus19" target="https://doi.org/10.1145/3366693"> | |||
<front> | <front> | |||
<title>The Great Internet TCP Congestion Control Census</title> | <title>The Great Internet TCP Congestion Control Census</title> | |||
<author fullname="Ayush Mishra" initials="A." surname="Mishra"> | <author fullname="Ayush Mishra" initials="A." surname="Mishra"> | |||
<organization/> | <organization/> | |||
</author> | </author> | |||
<author fullname="Xiangpeng Sun" initials="X." surname="Sun"> | <author fullname="Xiangpeng Sun" initials="X." surname="Sun"> | |||
<organization/> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | ||||
<code/> | ||||
<country/> | ||||
</postal> | ||||
<phone/> | ||||
<email/> | ||||
<uri/> | ||||
</address> | ||||
</author> | </author> | |||
<author fullname="Atishya Jain" initials="A." surname="Jain"> | <author fullname="Atishya Jain" initials="A." surname="Jain"> | |||
<organization/> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | ||||
<code/> | ||||
<country/> | ||||
</postal> | ||||
<phone/> | ||||
<email/> | ||||
<uri/> | ||||
</address> | ||||
</author> | </author> | |||
<author fullname="Sameer Pande" initials="S." surname="Pande"> | <author fullname="Sameer Pande" initials="S." surname="Pande"> | |||
<organization/> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | ||||
<code/> | ||||
<country/> | ||||
</postal> | ||||
<phone/> | ||||
<email/> | ||||
<uri/> | ||||
</address> | ||||
</author> | </author> | |||
<author fullname="Raj Joshi" initials="R." surname="Joshi"> | <author fullname="Raj Joshi" initials="R." surname="Joshi"> | |||
<organization/> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | ||||
<code/> | ||||
<country/> | ||||
</postal> | ||||
<phone/> | ||||
<email/> | ||||
<uri/> | ||||
</address> | ||||
</author> | </author> | |||
<author fullname="Ben Leong" initials="B." surname="Leong"> | <author fullname="Ben Leong" initials="B." surname="Leong"> | |||
<organization/> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | ||||
<code/> | ||||
<country/> | ||||
</postal> | ||||
<phone/> | ||||
<email/> | ||||
<uri/> | ||||
</address> | ||||
</author> | </author> | |||
<date month="December" year="2019"/> | <date month="December" year="2019"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc. ACM on Measurement and Analysis of Computing S | <seriesInfo name="DOI" value="10.1145/3366693"/> | |||
ystems" value="3(3)"/> | <refcontent>Proceedings of the ACM on Measurement and Analysis of Comp | |||
uting Systems, Vol. 3, Issue 3, Article No. 45, pp. 1-24</refcontent> | ||||
<format target="https://dl.acm.org/doi/10.1145/3366693" type="PDF"/> | <format target="https://dl.acm.org/doi/10.1145/3366693" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="PI2param" target="https://arxiv.org/abs/2107.01003"> | <reference anchor="PI2param" target="https://arxiv.org/abs/2107.01003"> | |||
<front> | <front> | |||
<title>PI2 Parameters</title> | <title>PI2 Parameters</title> | |||
<author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
<organization/> | <organization/> | |||
</author> | </author> | |||
<date month="July" year="2021"/> | <date month="July" year="2021"/> | |||
</front> | </front> | |||
<seriesInfo name="Technical Report" value="TR-BB-2021-001 arXiv:2107.0 | <seriesInfo name="DOI" value="10.48550/arXiv.2107.01003"/> | |||
1003 [cs.NI]"/> | <refcontent>Technical Report, TR-BB-2021-001, arXiv:2107.01003 [cs.NI] | |||
</refcontent> | ||||
<format target="https://arxiv.org/pdf/2107.01003" type="PDF"/> | <format target="https://arxiv.org/pdf/2107.01003" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="Labovitz10" target="https://doi.org/10.1145/1851275.1 851194"> | <reference anchor="Labovitz10" target="https://doi.org/10.1145/1851275.1 851194"> | |||
<front> | <front> | |||
<title>Internet Inter-Domain Traffic</title> | <title>Internet Inter-Domain Traffic</title> | |||
<author fullname="Craig Labovitz" initials="C." surname="Labovitz"> | <author fullname="Craig Labovitz" initials="C." surname="Labovitz"> | |||
<organization>Arbor Networks</organization> | <organization>Arbor Networks</organization> | |||
</author> | </author> | |||
<author fullname="Scott Iekel-Johnson" initials="S." surname="Iekel- Johnson"> | <author fullname="Scott Iekel-Johnson" initials="S." surname="Iekel- Johnson"> | |||
<organization>Arbor Networks</organization> | <organization>Arbor Networks</organization> | |||
</author> | </author> | |||
<author fullname="Danny McPherson" initials="D." surname="McPherson" > | <author fullname="Danny McPherson" initials="D." surname="McPherson" > | |||
<organization>Arbor Networks</organization> | <organization>Arbor Networks</organization> | |||
</author> | </author> | |||
<author fullname="Jon Oberheide" initials="J." surname="Oberheide"> | <author fullname="Jon Oberheide" initials="J." surname="Oberheide"> | |||
<organization>Uni Michigan</organization> | <organization>Uni Michigan</organization> | |||
</author> | </author> | |||
<author fullname="Farnam Jahanian" initials="F." surname="Jahanian"> | <author fullname="Farnam Jahanian" initials="F." surname="Jahanian"> | |||
<organization>Uni Michigan</organization> | <organization>Uni Michigan</organization> | |||
</author> | </author> | |||
<date month="August" year="2010"/> | <date month="August" year="2010"/> | |||
</front> | </front> | |||
<seriesInfo name="Proc ACM SIGCOMM; ACM CCR" value="40(4):75--86"/> | <seriesInfo name="DOI" value="10.1145/1851275.1851194"/> | |||
<refcontent>ACM SIGCOMM Computer Communication Review, Vol. 40, Issue | ||||
4, pp. 75-86</refcontent> | ||||
<format target="https://dl.acm.org/doi/pdf/10.1145/1851182.1851194" ty pe="PDF"/> | <format target="https://dl.acm.org/doi/pdf/10.1145/1851182.1851194" ty pe="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="SCReAM" target="https://github.com/EricssonResearch/s | ||||
cream/blob/master/README.md"> | <reference anchor="SCReAM-L4S" target="https://github.com/EricssonResear | |||
ch/scream"> | ||||
<front> | <front> | |||
<title>SCReAM</title> | <title>SCReAM</title> | |||
<author fullname="Ingemar Johansson" initials="I" surname="Johansson | <author/> | |||
"> | <date month="June" year="2022"/> | |||
<organization/> | ||||
</author> | ||||
<date/> | ||||
</front> | </front> | |||
<seriesInfo name="GitHub repository;" value=""/> | <refcontent>commit fda6c53</refcontent> | |||
<format target="https://github.com/google/bbr/blob/v2alpha/README.md" | ||||
type="Source code"/> | ||||
</reference> | </reference> | |||
<reference anchor="L4S_5G" target="https://www.ericsson.com/en/reports-a | ||||
nd-papers/white-papers/enabling-time-critical-applications-over-5g-with-rate-ada | <reference anchor="L4S_5G" target="https://www.ericsson.com/en/reports-and-paper | |||
ptation"> | s/white-papers/enabling-time-critical-applications-over-5g-with-rate-adaptation" | |||
> | ||||
<front> | <front> | |||
<title>Enabling time-critical applications over 5G with rate | <title>Enabling time-critical applications over 5G with rate | |||
adaptation</title> | adaptation</title> | |||
<author fullname="Per Willars" initials="P." surname="Willars"/> | <author fullname="Per Willars" initials="P." surname="Willars"/> | |||
<author fullname="Emma Wittenmark" initials="E." surname="Wittenmark "/> | <author fullname="Emma Wittenmark" initials="E." surname="Wittenmark "/> | |||
<author fullname="Henrik Ronkainen" initials="H." surname="Ronkainen "/> | <author fullname="Henrik Ronkainen" initials="H." surname="Ronkainen "/> | |||
<author fullname="Christer Östberg" initials="C." surname="Östberg"/ > | <author fullname="Christer Östberg" initials="C." surname="Östberg"/ > | |||
<author fullname="Ingemar Johansson" initials="I." surname="Johansso n"/> | <author fullname="Ingemar Johansson" initials="I." surname="Johansso n"/> | |||
<author fullname="Johan Strand" initials="J." surname="Strand"/> | <author fullname="Johan Strand" initials="J." surname="Strand"/> | |||
<author fullname="Petr Lédl" initials="P." surname="Lédl"/> | <author fullname="Petr Lédl" initials="P." surname="Lédl"/> | |||
<author fullname="Dominik Schnieders" initials="D." surname="Schnied ers"/> | <author fullname="Dominik Schnieders" initials="D." surname="Schnied ers"/> | |||
<date month="May" year="2021"/> | <date month="May" year="2021"/> | |||
</front> | </front> | |||
<seriesInfo name="Ericsson - Deutsche Telekom White Paper" value="BNEW -21:025455 Uen"/> | <refcontent>Ericsson - Deutsche Telekom White Paper, BNEW-21:025455</r efcontent> | |||
<format target="https://www.ericsson.com/49bc82/assets/local/reports-p apers/white-papers/26052021-enabling-time-critical-applications-over-5g-with-rat e-adaptation-whitepaper.pdf" type="PDF"/> | <format target="https://www.ericsson.com/49bc82/assets/local/reports-p apers/white-papers/26052021-enabling-time-critical-applications-over-5g-with-rat e-adaptation-whitepaper.pdf" type="PDF"/> | |||
</reference> | </reference> | |||
<reference anchor="BBRv2" target="https://github.com/google/bbr/blob/v2a | ||||
lpha/README.md"> | <reference anchor="BBRv2" target="https://github.com/google/bbr"> | |||
<front> | <front> | |||
<title>BRTCP BBR v2 Alpha/Preview Release</title> | <title>TCP BBR v2 Alpha/Preview Release</title> | |||
<author fullname="Neal Cardwell" initials="N" surname="Cardwell"> | <author/> | |||
<organization/> | <date month="June" year="2022"/> | |||
</author> | ||||
<date/> | ||||
</front> | </front> | |||
<seriesInfo name="GitHub repository;" value="Linux congestion control module"/> | <refcontent>commit 17700ca</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="Heist21" target="https://github.com/heistp/l4s-tests/ | ||||
#underutilization-with-bursty-traffic"> | <reference anchor="Heist21" target="https://github.com/heistp/l4s-tests" | |||
> | ||||
<front> | <front> | |||
<title>L4S Tests</title> | <title>L4S Tests</title> | |||
<author fullname="Pete Heist" initials="P." surname="Heist"> | <author/> | |||
<organization/> | ||||
</author> | ||||
<author fullname="Jonathan Morton" initials="J." surname="Morton"> | ||||
<organization/> | ||||
</author> | ||||
<date month="August" year="2021"/> | <date month="August" year="2021"/> | |||
</front> | </front> | |||
<seriesInfo name="GitHub" value="README"/> | <refcontent>commit e21cd91</refcontent> | |||
</reference> | </reference> | |||
<reference anchor="Boru20" target="https://dl.acm.org/doi/abs/10.1145/34 02413.3402419"> | <reference anchor="Boru20" target="https://dl.acm.org/doi/abs/10.1145/34 02413.3402419"> | |||
<front> | <front> | |||
<title>Validating the Sharing Behavior and Latency Characteristics | <title>Validating the Sharing Behavior and Latency Characteristics | |||
of the L4S Architecture</title> | of the L4S Architecture</title> | |||
<author fullname="Dejene Boru Oljira" initials="D." surname="Boru Ol jira"> | <author fullname="Dejene Boru Oljira" initials="D." surname="Boru Ol jira"> | |||
<organization>Karlstad Uni</organization> | <organization>Karlstad Uni</organization> | |||
</author> | </author> | |||
<author fullname="Karl-Johan Grinnemo" initials="K-J." surname="Grin nemo"> | <author fullname="Karl-Johan Grinnemo" initials="K-J." surname="Grin nemo"> | |||
<organization>Karlstad Uni</organization> | <organization>Karlstad Uni</organization> | |||
</author> | </author> | |||
<author fullname="Anna Brunstrom" initials="A." surname="Brunstrom"> | <author fullname="Anna Brunstrom" initials="A." surname="Brunstrom"> | |||
<organization>Karlstad Uni</organization> | <organization>Karlstad Uni</organization> | |||
</author> | </author> | |||
<author fullname="Javid Taheri" initials="J." surname="Taheri"> | <author fullname="Javid Taheri" initials="J." surname="Taheri"> | |||
<organization>Karlstad Uni</organization> | <organization>Karlstad Uni</organization> | |||
</author> | </author> | |||
<date month="May" year="2020"/> | <date month="May" year="2020"/> | |||
</front> | </front> | |||
<seriesInfo name="ACM CCR" value="50(2):37--44"/> | <seriesInfo name="DOI" value="10.1145/3402413.3402419"/> | |||
<refcontent>ACM SIGCOMM Computer Communication Review, Vol. 50, Issue | ||||
2, pp. 37-44</refcontent> | ||||
</reference> | </reference> | |||
</references> | </references> | |||
</references> | </references> | |||
<section anchor="dualq_Ex_algo_pi2" numbered="true" toc="default"> | <section anchor="dualq_Ex_algo_pi2" numbered="true" toc="default"> | |||
<name>Example DualQ Coupled PI2 Algorithm</name> | <name>Example DualQ Coupled PI2 Algorithm</name> | |||
<t>As a first concrete example, the pseudocode below gives the DualPI2 | <t>As a first concrete example, the pseudocode below gives the DualPI2 | |||
algorithm. DualPI2 follows the structure of the DualQ Coupled AQM | algorithm. DualPI2 follows the structure of the DualQ Coupled AQM | |||
framework in <xref target="dualq_fig_structure" format="default"/>. A simp le ramp | framework in <xref target="dualq_fig_structure" format="default"/>. A simp le ramp | |||
function (configured in units of queuing time) with unsmoothed ECN | function (configured in units of queuing time) with unsmoothed ECN | |||
marking is used for the Native L4S AQM. The ramp can also be configured | marking is used for the Native L4S AQM. The ramp can also be configured | |||
as a step function. The PI2 algorithm <xref target="PI2" format="default"/ > is used | as a step function. The PI2 algorithm <xref target="PI2" format="default"/ > is used | |||
for the Classic AQM. PI2 is an improved variant of the PIE | for the Classic AQM. PI2 is an improved variant of the PIE | |||
AQM <xref target="RFC8033" format="default"/>.</t> | AQM <xref target="RFC8033" format="default"/>.</t> | |||
<t>The pseudocode will be introduced in two passes. The first pass | <t>The pseudocode will be introduced in two passes. The first pass | |||
explains the core concepts, deferring handling of edge-cases like | explains the core concepts, deferring handling of edge-cases like | |||
overload to the second pass. To aid comparison, line numbers are kept in | overload to the second pass. To aid comparison, line numbers are kept in | |||
step between the two passes by using letter suffixes where the longer | step between the two passes by using letter suffixes where the longer | |||
code needs extra lines.</t> | code needs extra lines.</t> | |||
<t>All variables are assumed to be floating point in their basic units | <t>All variables are assumed to be floating point in their basic units | |||
(size in bytes, time in seconds, rates in bytes/second, alpha and beta | (size in bytes, time in seconds, rates in bytes/second, alpha and beta | |||
in Hz, and probabilities from 0 to 1. Constants expressed in k (kilo), M | in Hz, and probabilities from 0 to 1). Constants expressed in k (kilo), M | |||
(mega), G (giga), u (micro), m (milli) , %, ... are assumed to be | (mega), G (giga), u (micro), m (milli), %, and so forth, are assumed to be | |||
converted to their appropriate multiple or fraction to represent the | converted to their appropriate multiple or fraction to represent the | |||
basic units. A real implementation that wants to use integer values | basic units. A real implementation that wants to use integer values | |||
needs to handle appropriate scaling factors and allow accordingly | needs to handle appropriate scaling factors and allow | |||
appropriate resolution of its integer types (including temporary | appropriate resolution of its integer types (including temporary | |||
internal values during calculations).</t> | internal values during calculations).</t> | |||
<t>A full open source implementation for Linux is available at: | <t>A full open source implementation for Linux is available at | |||
https://github.com/L4STeam/sch_dualpi2_upstream and explained in <xref tar | <eref target="https://github.com/L4STeam/sch_dualpi2_upstream" brackets="a | |||
get="DualPI2Linux" format="default"/>. The specification of the DualQ Coupled AQ | ngle"/> and explained in <xref target="DualPI2Linux" format="default"/>. The spe | |||
M for | cification of the DualQ Coupled AQM for | |||
DOCSIS cable modems and CMTSs is available in <xref target="DOCSIS3.1" for | DOCSIS cable modems and cable modem termination systems (CMTSs) is availab | |||
mat="default"/> | le in <xref target="DOCSIS3.1" format="default"/> | |||
and explained in <xref target="LLD" format="default"/>.</t> | and explained in <xref target="LLD" format="default"/>.</t> | |||
<section anchor="dualq_Ex_algo_pi2-1" numbered="true" toc="default"> | <section anchor="dualq_Ex_algo_pi2-1" numbered="true" toc="default"> | |||
<name>Pass #1: Core Concepts</name> | <name>Pass #1: Core Concepts</name> | |||
<t>The pseudocode manipulates three main structures of variables: the | <t>The pseudocode manipulates three main structures of variables: the | |||
packet (pkt), the L4S queue (lq) and the Classic queue (cq). The | packet (pkt), the L4S queue (lq), and the Classic queue (cq). The | |||
pseudocode consists of the following six functions:</t> | pseudocode consists of the following six functions:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>The initialization function dualpi2_params_init(...) (<xref target ="dualq_fig_Algo_pi2_core_header" format="default"/>) that sets parameter | <li>The initialization function dualpi2_params_init(...) (<xref target ="dualq_fig_Algo_pi2_core_header" format="default"/>) that sets parameter | |||
defaults (the API for setting non-default values is omitted for | defaults (the API for setting non-default values is omitted for | |||
brevity)</li> | brevity).</li> | |||
<li>The enqueue function dualpi2_enqueue(lq, cq, pkt) (<xref target="d | <li>The enqueue function dualpi2_enqueue(lq, cq, pkt) (<xref target="d | |||
ualq_fig_Algo_pi2_enqueue" format="default"/>)</li> | ualq_fig_Algo_pi2_enqueue" format="default"/>).</li> | |||
<li>The dequeue function dualpi2_dequeue(lq, cq, pkt) (<xref target="d | <li>The dequeue function dualpi2_dequeue(lq, cq, pkt) (<xref target="d | |||
ualq_fig_Algo_pi2_dequeue" format="default"/>)</li> | ualq_fig_Algo_pi2_dequeue" format="default"/>).</li> | |||
<li>The recurrence function recur(q, likelihood) for de-randomized | <li>The recurrence function recur(q, likelihood) for de-randomized | |||
ECN marking (shown at the end of <xref target="dualq_fig_Algo_pi2_de queue" format="default"/>).</li> | ECN marking (shown at the end of <xref target="dualq_fig_Algo_pi2_de queue" format="default"/>).</li> | |||
<li>The L4S AQM function laqm(qdelay) (<xref target="dualq_fig_Algo_la qm_core" format="default"/>) used to calculate the | <li>The L4S AQM function laqm(qdelay) (<xref target="dualq_fig_Algo_la qm_core" format="default"/>) used to calculate the | |||
ECN-marking probability for the L4S queue</li> | ECN-marking probability for the L4S queue.</li> | |||
<li>The base AQM function that implements the PI algorithm | <li>The Base AQM function that implements the PI algorithm | |||
dualpi2_update(lq, cq) (<xref target="dualq_fig_Algo_pi2_core" forma t="default"/>) | dualpi2_update(lq, cq) (<xref target="dualq_fig_Algo_pi2_core" forma t="default"/>) | |||
used to regularly update the base probability (p'), which is | used to regularly update the base probability (p'), which is | |||
squared for the Classic AQM as well as being coupled across to the | squared for the Classic AQM as well as being coupled across to the | |||
L4S queue.</li> | L4S queue.</li> | |||
</ul> | </ul> | |||
<t>It also uses the following functions that are not shown in | <t>It also uses the following functions that are not shown in | |||
full here:</t> | full here:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>scheduler(), which selects between the head packets of the two | <li>scheduler(), which selects between the head packets of the two | |||
queues; the choice of scheduler technology is discussed later;</li> | queues. The choice of scheduler technology is discussed later.</li> | |||
<li>cq.byt() or lq.byt() returns the current length | <li>cq.byt() or lq.byt() returns the current length | |||
(aka. backlog) of the relevant queue in bytes;</li> | (a.k.a. backlog) of the relevant queue in bytes.</li> | |||
<li>cq.len() or lq.len() returns the current length of the relevant | <li>cq.len() or lq.len() returns the current length of the relevant | |||
queue in packets;</li> | queue in packets.</li> | |||
<li>cq.time() or lq.time() returns the current queuing delay of the | <li>cq.time() or lq.time() returns the current queuing delay of the | |||
relevant queue in units of time (see Note a);</li> | relevant queue in units of time (see <xref target="note_qdelay" form | |||
<li>mark(pkt) and drop(pkt) for ECN-marking and dropping a | at="none">Note a</xref> below).</li> | |||
packet;</li> | <li>mark(pkt) and drop(pkt) for ECN marking and dropping a | |||
packet.</li> | ||||
</ul> | </ul> | |||
<t>In experiments so far (building on experiments with PIE) on | <t>In experiments so far (building on experiments with PIE) on | |||
broadband access links ranging from 4 Mb/s to 200 Mb/s with base RTTs | broadband access links ranging from 4 Mb/s to 200 Mb/s with base RTTs | |||
from 5 ms to 100 ms, DualPI2 achieves good results with the default | from 5 ms to 100 ms, DualPI2 achieves good results with the default | |||
parameters in <xref target="dualq_fig_Algo_pi2_core_header" format="defa ult"/>. The | parameters in <xref target="dualq_fig_Algo_pi2_core_header" format="defa ult"/>. The | |||
parameters are categorised by whether they relate to the Base PI2 AQM, | parameters are categorised by whether they relate to the PI2 AQM, | |||
the L4S AQM or the framework coupling them together. Constants and | the L4S AQM, or the framework coupling them together. Constants and | |||
variables derived from these parameters are also included at the end | variables derived from these parameters are also included at the end | |||
of each category. Each parameter is explained as it is encountered in | of each category. Each parameter is explained as it is encountered in | |||
the walk-through of the pseudocode below, and the rationale for the | the walk-through of the pseudocode below, and the rationale for the | |||
chosen defaults are given so that sensible values can be used in | chosen defaults are given so that sensible values can be used in | |||
scenarios other than the regular public Internet.</t> | scenarios other than the regular public Internet.</t> | |||
<figure anchor="dualq_fig_Algo_pi2_core_header"> | <figure anchor="dualq_fig_Algo_pi2_core_header"> | |||
<name>Example Header Pseudocode for DualQ Coupled PI2 AQM</name> | <name>Example Header Pseudocode for DualQ Coupled PI2 AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_para | <sourcecode><![CDATA[ | |||
ms_init(...) { % Set input parameter defaults | 1: dualpi2_params_init(...) { % Set input parameter defaults | |||
2: % DualQ Coupled framework parameters | 2: % DualQ Coupled framework parameters | |||
5: limit = MAX_LINK_RATE * 250 ms % Dual buffer size | 5: limit = MAX_LINK_RATE * 250 ms % Dual buffer size | |||
3: k = 2 % Coupling factor | 3: k = 2 % Coupling factor | |||
4: % NOT SHOWN % scheduler-dependent weight or equival't parameter | 4: % NOT SHOWN % scheduler-dependent weight or equival't parameter | |||
6: | 6: | |||
7: % PI2 Classic AQM parameters | 7: % PI2 Classic AQM parameters | |||
8: target = 15 ms % Queue delay target | 8: target = 15 ms % Queue delay target | |||
9: RTT_max = 100 ms % Worst case RTT expected | 9: RTT_max = 100 ms % Worst case RTT expected | |||
10: % PI2 constants derived from above PI2 parameters | 10: % PI2 constants derived from above PI2 parameters | |||
11: p_Cmax = min(1/k^2, 1) % Max Classic drop/mark prob | 11: p_Cmax = min(1/k^2, 1) % Max Classic drop/mark prob | |||
12: Tupdate = min(target, RTT_max/3) % PI sampling interval | 12: Tupdate = min(target, RTT_max/3) % PI sampling interval | |||
13: alpha = 0.1 * Tupdate / RTT_max^2 % PI integral gain in Hz | 13: alpha = 0.1 * Tupdate / RTT_max^2 % PI integral gain in Hz | |||
14: beta = 0.3 / RTT_max % PI proportional gain in Hz | 14: beta = 0.3 / RTT_max % PI proportional gain in Hz | |||
15: | 15: | |||
16: % L4S ramp AQM parameters | 16: % L4S ramp AQM parameters | |||
17: minTh = 800 us % L4S min marking threshold in time units | 17: minTh = 800 us % L4S min marking threshold in time units | |||
18: range = 400 us % Range of L4S ramp in time units | 18: range = 400 us % Range of L4S ramp in time units | |||
19: Th_len = 1 pkt % Min L4S marking threshold in packets | 19: Th_len = 1 pkt % Min L4S marking threshold in packets | |||
20: % L4S constants | 20: % L4S constants | |||
21: p_Lmax = 1 % Max L4S marking prob | 21: p_Lmax = 1 % Max L4S marking prob | |||
22: } | 22: }]]></sourcecode> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>The overall goal of the code is to apply the marking and dropping | <t>The overall goal of the code is to apply the marking and dropping | |||
probabilities for L4S and Classic traffic (p_L and p_C). These are | probabilities for L4S and Classic traffic (p_L and p_C). These are | |||
derived from the underlying base probabilities p'_L and p' driven | derived from the underlying base probabilities p'_L and p' driven, | |||
respectively by the traffic in the L and C queues. The marking | respectively, by the traffic in the L and C queues. The marking | |||
probability for the L queue (p_L) depends on both the base probability | probability for the L queue (p_L) depends on both the base probability | |||
in its own queue (p'_L) and a probability called p_CL, which is | in its own queue (p'_L) and a probability called p_CL, which is | |||
coupled across from p' in the C queue (see <xref target="dualq_coupled_s tructure" format="default"/> for the derivation of the specific | coupled across from p' in the C queue (see <xref target="dualq_coupled_s tructure" format="default"/> for the derivation of the specific | |||
equations and dependencies).</t> | equations and dependencies).</t> | |||
<t>The probabilities p_CL and p_C are derived in lines 4 and 5 of the | <t>The probabilities p_CL and p_C are derived in lines 4 and 5 of the | |||
dualpi2_update() function (<xref target="dualq_fig_Algo_pi2_core" format ="default"/>) | dualpi2_update() function (<xref target="dualq_fig_Algo_pi2_core" format ="default"/>) | |||
then used in the dualpi2_dequeue() function where p_L is also derived | then used in the dualpi2_dequeue() function where p_L is also derived | |||
from p_CL at line 6 (<xref target="dualq_fig_Algo_pi2_dequeue" format="d efault"/>). The | from p_CL at line 6 (<xref target="dualq_fig_Algo_pi2_dequeue" format="d efault"/>). The | |||
code walk-through below builds up to explaining that part of the code | code walk-through below builds up to explaining that part of the code | |||
eventually, but it starts from packet arrival.</t> | eventually, but it starts from packet arrival.</t> | |||
<figure anchor="dualq_fig_Algo_pi2_enqueue"> | <figure anchor="dualq_fig_Algo_pi2_enqueue"> | |||
<name>Example Enqueue Pseudocode for DualQ Coupled PI2 AQM</name> | <name>Example Enqueue Pseudocode for DualQ Coupled PI2 AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_enqu | <sourcecode><![CDATA[ | |||
eue(lq, cq, pkt) { % Test limit and classify lq or cq | 1: dualpi2_enqueue(lq, cq, pkt) { % Test limit and classify lq or cq | |||
2: if ( lq.byt() + cq.byt() + MTU > limit) | 2: if ( lq.byt() + cq.byt() + MTU > limit) | |||
3: drop(pkt) % drop packet if buffer is full | 3: drop(pkt) % drop packet if buffer is full | |||
4: timestamp(pkt) % only needed if using the sojourn technique | 4: timestamp(pkt) % only needed if using the sojourn technique | |||
5: % Packet classifier | 5: % Packet classifier | |||
6: if ( ecn(pkt) modulo 2 == 1 ) % ECN bits = ECT(1) or CE | 6: if ( ecn(pkt) modulo 2 == 1 ) % ECN bits = ECT(1) or CE | |||
7: lq.enqueue(pkt) | 7: lq.enqueue(pkt) | |||
8: else % ECN bits = not-ECT or ECT(0) | 8: else % ECN bits = not-ECT or ECT(0) | |||
9: cq.enqueue(pkt) | 9: cq.enqueue(pkt) | |||
10: } | 10: }]]></sourcecode> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<figure anchor="dualq_fig_Algo_pi2_dequeue"> | <figure anchor="dualq_fig_Algo_pi2_dequeue"> | |||
<name>Example Dequeue Pseudocode for DualQ Coupled PI2 AQM</name> | <name>Example Dequeue Pseudocode for DualQ Coupled PI2 AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_dequ | <sourcecode><![CDATA[ | |||
eue(lq, cq, pkt) { % Couples L4S & Classic queues | 1: dualpi2_dequeue(lq, cq, pkt) { % Couples L4S & Classic queues | |||
2: while ( lq.byt() + cq.byt() > 0 ) { | 2: while ( lq.byt() + cq.byt() > 0 ) { | |||
3: if ( scheduler() == lq ) { | 3: if ( scheduler() == lq ) { | |||
4: lq.dequeue(pkt) % Scheduler chooses lq | 4: lq.dequeue(pkt) % Scheduler chooses lq | |||
5: p'_L = laqm(lq.time()) % Native LAQM | 5: p'_L = laqm(lq.time()) % Native LAQM | |||
6: p_L = max(p'_L, p_CL) % Combining function | 6: p_L = max(p'_L, p_CL) % Combining function | |||
7: if ( recur(lq, p_L) ) % Linear marking | 7: if ( recur(lq, p_L) ) % Linear marking | |||
8: mark(pkt) | 8: mark(pkt) | |||
9: } else { | 9: } else { | |||
10: cq.dequeue(pkt) % Scheduler chooses cq | 10: cq.dequeue(pkt) % Scheduler chooses cq | |||
11: if ( recur(cq, p_C) ) { % probability p_C = p'^2 | 11: if ( recur(cq, p_C) ) { % probability p_C = p'^2 | |||
skipping to change at line 2607 ¶ | skipping to change at line 1982 ¶ | |||
21: return(NULL) % no packet to dequeue | 21: return(NULL) % no packet to dequeue | |||
22: } | 22: } | |||
23: recur(q, likelihood) { % Returns TRUE with a certain likelihood | 23: recur(q, likelihood) { % Returns TRUE with a certain likelihood | |||
24: q.count += likelihood | 24: q.count += likelihood | |||
25: if (q.count > 1) { | 25: if (q.count > 1) { | |||
26: q.count -= 1 | 26: q.count -= 1 | |||
27: return TRUE | 27: return TRUE | |||
28: } | 28: } | |||
29: return FALSE | 29: return FALSE | |||
30: } | 30: }]]></sourcecode> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>When packets arrive, first a common queue limit is checked as shown | <t>When packets arrive, a common queue limit is checked first as shown | |||
in line 2 of the enqueuing pseudocode in <xref target="dualq_fig_Algo_pi 2_enqueue" format="default"/>. This assumes a shared buffer | in line 2 of the enqueuing pseudocode in <xref target="dualq_fig_Algo_pi 2_enqueue" format="default"/>. This assumes a shared buffer | |||
for the two queues (Note b discusses the merits of separate buffers). | for the two queues (<xref target="note_separate_buffers" format="none">N ote b</xref> discusses the merits of separate buffers). | |||
In order to avoid any bias against larger packets, 1 MTU of space is | In order to avoid any bias against larger packets, 1 MTU of space is | |||
always allowed, and the limit is deliberately tested before | always allowed, and the limit is deliberately tested before | |||
enqueue.</t> | enqueue.</t> | |||
<t>If limit is not exceeded, the packet is timestamped in line 4 (only | <t>If limit is not exceeded, the packet is timestamped in line 4 (only | |||
if the sojourn time technique is being used to measure queue delay; | if the sojourn time technique is being used to measure queue delay; | |||
see Note a for alternatives).</t> | see <xref target="note_qdelay" format="none">Note a</xref> below for alt ernatives).</t> | |||
<t>At lines 5-9, the packet is classified and enqueued to the Classic | <t>At lines 5-9, the packet is classified and enqueued to the Classic | |||
or L4S queue dependent on the least significant bit of the ECN field | or L4S queue dependent on the least significant bit (LSB) of the ECN fie ld | |||
in the IP header (line 6). Packets with a codepoint having an LSB of 0 | in the IP header (line 6). Packets with a codepoint having an LSB of 0 | |||
(Not-ECT and ECT(0)) will be enqueued in the Classic queue. Otherwise, | (Not-ECT and ECT(0)) will be enqueued in the Classic queue. Otherwise, | |||
ECT(1) and CE packets will be enqueued in the L4S queue. Optional | ECT(1) and CE packets will be enqueued in the L4S queue. Optional | |||
additional packet classification flexibility is omitted for brevity | additional packet classification flexibility is omitted for brevity | |||
(see the L4S ECN protocol <xref target="I-D.ietf-tsvwg-ecn-l4s-id" forma t="default"/>).</t> | (see the L4S ECN protocol <xref target="RFC9331" format="default"/>).</t > | |||
<t>The dequeue pseudocode (<xref target="dualq_fig_Algo_pi2_dequeue" for mat="default"/>) is repeatedly called whenever | <t>The dequeue pseudocode (<xref target="dualq_fig_Algo_pi2_dequeue" for mat="default"/>) is repeatedly called whenever | |||
the lower layer is ready to forward a packet. It schedules one packet | the lower layer is ready to forward a packet. It schedules one packet | |||
for dequeuing (or zero if the queue is empty) then returns control to | for dequeuing (or zero if the queue is empty) then returns control to | |||
the caller, so that it does not block while that packet is being | the caller so that it does not block while that packet is being | |||
forwarded. While making this dequeue decision, it also makes the | forwarded. While making this dequeue decision, it also makes the | |||
necessary AQM decisions on dropping or marking. The alternative of | necessary AQM decisions on dropping or marking. The alternative of | |||
applying the AQMs at enqueue would shift some processing from the | applying the AQMs at enqueue would shift some processing from the | |||
critical time when each packet is dequeued. However, it would also add | critical time when each packet is dequeued. However, it would also add | |||
a whole queue of delay to the control signals, making the control loop | a whole queue of delay to the control signals, making the control loop | |||
sloppier (for a typical RTT it would double the Classic queue's | sloppier (for a typical RTT, it would double the Classic queue's | |||
feedback delay).</t> | feedback delay).</t> | |||
<t>All the dequeue code is contained within a large while loop so that | <t>All the dequeue code is contained within a large while loop so that | |||
if it decides to drop a packet, it will continue until it selects a | if it decides to drop a packet, it will continue until it selects a | |||
packet to schedule. Line 3 of the dequeue pseudocode is where the | packet to schedule. Line 3 of the dequeue pseudocode is where the | |||
scheduler chooses between the L4S queue (lq) and the Classic queue | scheduler chooses between the L4S queue (lq) and the Classic queue | |||
(cq). Detailed implementation of the scheduler is not shown (see | (cq). Detailed implementation of the scheduler is not shown (see | |||
discussion later). </t> | discussion later). </t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li> | <li> | |||
<t>If an L4S packet is scheduled, in lines 7 and 8 the packet is | <t>If an L4S packet is scheduled, in lines 7 and 8 the packet is | |||
ECN-marked with likelihood p_L. The recur() function at the end of | ECN-marked with likelihood p_L. The recur() function at the end of | |||
<xref target="dualq_fig_Algo_pi2_dequeue" format="default"/> is used , which is | <xref target="dualq_fig_Algo_pi2_dequeue" format="default"/> is used , which is | |||
preferred over random marking because it avoids delay due to | preferred over random marking because it avoids delay due to | |||
randomization when interpreting congestion signals, but it still | randomization when interpreting congestion signals, but it still | |||
desynchronizes the saw-teeth of the flows. Line 6 calculates p_L | desynchronizes the sawteeth of the flows. Line 6 calculates p_L | |||
as the maximum of the coupled L4S probability p_CL and the | as the maximum of the coupled L4S probability p_CL and the | |||
probability from the native L4S AQM p'_L. This implements the | probability from the Native L4S AQM p'_L. This implements the | |||
max() function shown in <xref target="dualq_fig_structure" format="d efault"/> to | max() function shown in <xref target="dualq_fig_structure" format="d efault"/> to | |||
couple the outputs of the two AQMs together. Of the two | couple the outputs of the two AQMs together. Of the two | |||
probabilities input to p_L in line 6:</t> | probabilities input to p_L in line 6:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>p'_L is calculated per packet in line 5 by the laqm() | <li>p'_L is calculated per packet in line 5 by the laqm() | |||
function (see <xref target="dualq_fig_Algo_laqm_core" format="de | function (see <xref target="dualq_fig_Algo_laqm_core" format="de | |||
fault"/>),</li> | fault"/>), whereas</li> | |||
<li>Whereas p_CL is maintained by the dualpi2_update() function | <li>p_CL is maintained by the dualpi2_update() function, | |||
which runs every Tupdate (Tupdate is set in line 12 of <xref tar get="dualq_fig_Algo_pi2_core_header" format="default"/>).</li> | which runs every Tupdate (Tupdate is set in line 12 of <xref tar get="dualq_fig_Algo_pi2_core_header" format="default"/>).</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li>If a Classic packet is scheduled, lines 10 to 17 drop or mark | <li>If a Classic packet is scheduled, lines 10 to 17 drop or mark | |||
the packet with probability p_C.</li> | the packet with probability p_C.</li> | |||
</ul> | </ul> | |||
<t>The Native L4S AQM algorithm (<xref target="dualq_fig_Algo_laqm_core" format="default"/>) is a ramp function, similar to | <t>The Native L4S AQM algorithm (<xref target="dualq_fig_Algo_laqm_core" format="default"/>) is a ramp function, similar to | |||
the RED algorithm, but simplified as follows:</t> | the RED algorithm, but simplified as follows:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>The extent of the ramp is defined in units of queuing delay, | <li>The extent of the ramp is defined in units of queuing delay, | |||
not bytes, so that configuration remains invariant as the queue | not bytes, so that configuration remains invariant as the queue | |||
departure rate varies.</li> | departure rate varies.</li> | |||
<li>It uses instantaneous queueing delay, which avoids the | <li>It uses instantaneous queuing delay, which avoids the | |||
complexity of smoothing, but also avoids embedding a worst-case | complexity of smoothing, but also avoids embedding a worst-case | |||
RTT of smoothing delay in the network (see <xref target="dualq_coupl ed" format="default"/>).</li> | RTT of smoothing delay in the network (see <xref target="dualq_coupl ed" format="default"/>).</li> | |||
<li>The ramp rises linearly directly from 0 to 1, not to an | <li>The ramp rises linearly directly from 0 to 1, not to an | |||
intermediate value of p'_L as RED would, because there is no need | intermediate value of p'_L as RED would, because there is no need | |||
to keep ECN marking probability low.</li> | to keep ECN-marking probability low.</li> | |||
<li>Marking does not have to be randomized. Determinism is used | <li>Marking does not have to be randomized. Determinism is used | |||
instead of randomness; to reduce the delay necessary to smooth out | instead of randomness to reduce the delay necessary to smooth out | |||
the noise of randomness from the signal.</li> | the noise of randomness from the signal.</li> | |||
</ul> | </ul> | |||
<t>The ramp function requires two configuration parameters, the | <t>The ramp function requires two configuration parameters, the | |||
minimum threshold (minTh) and the width of the ramp (range), both in | minimum threshold (minTh) and the width of the ramp (range), both in | |||
units of queuing time, as shown in lines 17 & 18 of the | units of queuing time, as shown in lines 17 and 18 of the | |||
initialization function in <xref target="dualq_fig_Algo_pi2_core_header" format="default"/>. The ramp function can be | initialization function in <xref target="dualq_fig_Algo_pi2_core_header" format="default"/>. The ramp function can be | |||
configured as a step (see Note c).</t> | configured as a step (see <xref target="note_ramp" format="none">Note c< | |||
<t>Although the DCTCP paper <xref target="Alizadeh-stability" format="de | /xref>).</t> | |||
fault"/> | <t>Although the DCTCP paper <xref target="Alizadeh-stability" format="de | |||
recommends an ECN marking threshold of 0.17*RTT_typ, it also shows | fault"/> | |||
recommends an ECN-marking threshold of 0.17*RTT_typ, it also shows | ||||
that the threshold can be much shallower with hardly any worse | that the threshold can be much shallower with hardly any worse | |||
under-utilization of the link (because the amplitude of DCTCP's | underutilization of the link (because the amplitude of DCTCP's | |||
sawteeth is so small). Based on extensive experiments, for the public | sawteeth is so small). Based on extensive experiments, for the public | |||
Internet the default minimum ECN marking threshold (target) in <xref tar | Internet the default minimum ECN-marking threshold (target) in <xref tar | |||
get="dualq_fig_Algo_pi2_core_header" format="default"/> is considered a good | get="dualq_fig_Algo_pi2_core_header" format="default"/> is considered a good | |||
compromise, even though it is significantly smaller fraction of | compromise, even though it is a significantly smaller fraction of | |||
RTT_typ.</t> | RTT_typ.</t> | |||
<figure anchor="dualq_fig_Algo_laqm_core"> | <figure anchor="dualq_fig_Algo_laqm_core"> | |||
<name>Example Pseudocode for the Native L4S AQM</name> | <name>Example Pseudocode for the Native L4S AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: laqm(qdelay) | <sourcecode><![CDATA[ | |||
{ % Returns native L4S AQM probability | 1: laqm(qdelay) { % Returns Native L4S AQM probability | |||
2: if (qdelay >= maxTh) | 2: if (qdelay >= maxTh) | |||
3: return 1 | 3: return 1 | |||
4: else if (qdelay > minTh) | 4: else if (qdelay > minTh) | |||
5: return (qdelay - minTh)/range % Divide could use a bit-shift | 5: return (qdelay - minTh)/range % Divide could use a bit-shift | |||
6: else | 6: else | |||
7: return 0 | 7: return 0 | |||
8: } | 8: }]]></sourcecode> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t/> | <t/> | |||
<figure anchor="dualq_fig_Algo_pi2_core"> | <figure anchor="dualq_fig_Algo_pi2_core"> | |||
<name>Example PI-Update Pseudocode for DualQ Coupled PI2 AQM</name> | <name>Example PI-update Pseudocode for DualQ Coupled PI2 AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_upda | <sourcecode><![CDATA[ | |||
te(lq, cq) { % Update p' every Tupdate | 1: dualpi2_update(lq, cq) { % Update p' every Tupdate | |||
2: curq = cq.time() % use queuing time of first-in Classic packet | 2: curq = cq.time() % use queuing time of first-in Classic packet | |||
3: p' = p' + alpha * (curq - target) + beta * (curq - prevq) | 3: p' = p' + alpha * (curq - target) + beta * (curq - prevq) | |||
4: p_CL = k * p' % Coupled L4S prob = base prob * coupling factor | 4: p_CL = k * p' % Coupled L4S prob = base prob * coupling factor | |||
5: p_C = p'^2 % Classic prob = (base prob)^2 | 5: p_C = p'^2 % Classic prob = (base prob)^2 | |||
6: prevq = curq | 6: prevq = curq | |||
7: } | 7: }]]></sourcecode> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t keepWithPrevious="true">(Clamping p' within the range [0,1] omitted f | <t keepWithPrevious="true" indent='3'>(Note: Clamping p' within the rang | |||
or clarity - | e [0,1] omitted for clarity -- see below.)</t> | |||
see text)</t> | <t>The coupled marking probability p_CL depends on the base | |||
<t>The coupled marking probability, p_CL depends on the base | probability (p'), which is kept up to date by executing the core PI algo | |||
probability (p'), which is kept up to date by the core PI algorithm in | rithm in | |||
<xref target="dualq_fig_Algo_pi2_core" format="default"/> executed every | <xref target="dualq_fig_Algo_pi2_core" format="default"/> every Tupdate. | |||
Tupdate.</t> | </t> | |||
<t>Note that p' solely depends on the queuing time in the Classic | <t>Note that p' solely depends on the queuing time in the Classic | |||
queue. In line 2, the current queuing delay (curq) is evaluated from | queue. In line 2, the current queuing delay (curq) is evaluated from | |||
how long the head packet was in the Classic queue (cq). The function | how long the head packet was in the Classic queue (cq). The function | |||
cq.time() (not shown) subtracts the time stamped at enqueue from the | cq.time() (not shown) subtracts the time stamped at enqueue from the | |||
current time (see Note a) and implicitly takes the current queuing | current time (see <xref target="note_qdelay" | |||
format="none">Note a</xref> below) and implicitly takes the current queuing | ||||
delay as 0 if the queue is empty.</t> | delay as 0 if the queue is empty.</t> | |||
<t>The algorithm centres on line 3, which is a classical | <t>The algorithm centres on line 3, which is a classical | |||
Proportional-Integral (PI) controller that alters p' dependent on: a) | PI controller that alters p' dependent on: a) | |||
the error between the current queuing delay (curq) and the target | the error between the current queuing delay (curq) and the target | |||
queuing delay, 'target'; and b) the change in queuing delay since the | queuing delay (target) and b) the change in queuing delay since the | |||
last sample. The name 'PI' represents the fact that the second factor | last sample. The name 'PI' represents the fact that the second factor | |||
(how fast the queue is growing) is <em>P</em>roportional | (how fast the queue is growing) is Proportional | |||
to load while the first is the <em>I</em>ntegral of | to load while the first is the Integral of | |||
the load (so it removes any standing queue in excess of the | the load (so it removes any standing queue in excess of the | |||
target).</t> | target).</t> | |||
<t>The target parameter can be set based on local knowledge, but the | <t>The target parameter can be set based on local knowledge, but the | |||
aim is for the default to be a good compromise for anywhere in the | aim is for the default to be a good compromise for anywhere in the | |||
intended deployment environment -- the public Internet. According | intended deployment environment -- the public Internet. According | |||
to <xref target="PI2param" format="default"/>, the target queuing delay on line 9 of | to <xref target="PI2param" format="default"/>, the target queuing delay on line 8 of | |||
<xref target="dualq_fig_Algo_pi2_core_header" format="default"/> is rela ted to the | <xref target="dualq_fig_Algo_pi2_core_header" format="default"/> is rela ted to the | |||
typical base RTT worldwide, RTT_typ, by two factors: target = RTT_typ | typical base RTT worldwide, RTT_typ, by two factors: target = RTT_typ | |||
* g * f. Below we summarize the rationale behind these factors and | * g * f. Below, we summarize the rationale behind these factors and | |||
introduce a further adjustment. The two factors ensure that, in a | introduce a further adjustment. The two factors ensure that, in a | |||
large proportion of cases (say 90%), the sawtooth variations in RTT of | large proportion of cases (say 90%), the sawtooth variations in RTT of | |||
a single flow will fit within the buffer without underutilizing the | a single flow will fit within the buffer without underutilizing the | |||
link. Frankly, these factors are educated guesses, but with the | link. Frankly, these factors are educated guesses, but with the | |||
emphasis closer to 'educated' than to 'guess' (see <xref target="PI2para m" format="default"/> for full background):</t> | emphasis closer to 'educated' than to 'guess' (see <xref target="PI2para m" format="default"/> for the full background):</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>RTT_typ is taken as 25 ms. This is based on an average CDN | <li>RTT_typ is taken as 25 ms. This is based on an average CDN | |||
latency measured in each country weighted by the number of | latency measured in each country weighted by the number of | |||
Internet users in that country to produce an overall weighted | Internet users in that country to produce an overall weighted | |||
average for the Internet <xref target="PI2param" format="default"/>. Countries | average for the Internet <xref target="PI2param" format="default"/>. Countries | |||
were ranked by number of Internet users, and once 90% of Internet | were ranked by number of Internet users, and once 90% of Internet | |||
users were covered, smaller countries were excluded to avoid | users were covered, smaller countries were excluded to avoid | |||
unrepresentatively small sample sizes. Also, importantly, the data | small sample sizes that would be less representative. Also, importan tly, the data | |||
for the average CDN latency in China (with the largest number of | for the average CDN latency in China (with the largest number of | |||
Internet users) has been removed, because the CDN latency was a | Internet users) has been removed, because the CDN latency was a | |||
significant outlier and, on reflection, the experimental technique | significant outlier and, on reflection, the experimental technique | |||
seemed inappropriate to the CDN market in China.</li> | seemed inappropriate to the CDN market in China.</li> | |||
<li>g is taken as 0.38. The factor g is a geometry factor that | <li>g is taken as 0.38. The factor g is a geometry factor that | |||
characterizes the shape of the sawteeth of prevalent Classic | characterizes the shape of the sawteeth of prevalent Classic | |||
congestion controllers. The geometry factor is the fraction of the | congestion controllers. The geometry factor is the fraction of the | |||
amplitude of the sawtooth variability in queue delay that lies | amplitude of the sawtooth variability in queue delay that lies | |||
below the AQM's target. For instance, at low bit rate, the | below the AQM's target. | |||
geometry factor of standard Reno is 0.5, but at higher rates it | For instance, at low bitrates, the | |||
tends to just under 1. According to the census of congestion | geometry factor of standard Reno is 0.5, but at higher rates, it | |||
controllers conducted by Mishra et al. in Jul-Oct | tends towards just under 1. According to the census of congestion | |||
2019 <xref target="CCcensus19" format="default"/>, most Classic TCP | controllers conducted by Mishra et al. in Jul-Oct | |||
traffic | 2019 <xref target="CCcensus19" format="default"/>, most Classic TCP | |||
uses Cubic. And, according to the analysis in <xref target="PI2param | traffic | |||
" format="default"/>, if running over a PI2 AQM, a large proportion | uses CUBIC. And, according to the analysis in <xref target="PI2param | |||
of this Cubic traffic would be in its Reno-Friendly mode, which | " format="default"/>, if running over a PI2 AQM, a large proportion | |||
has a geometry factor of ~0.39 (all known implementations). The | of this CUBIC traffic would be in its Reno-friendly mode, which | |||
rest of the Cubic traffic would be in true Cubic mode, which has a | has a geometry factor of ~0.39 (for all known implementations). The | |||
rest of the CUBIC traffic would be in true CUBIC mode, which has a | ||||
geometry factor of ~0.36. Without modelling the sawtooth profiles | geometry factor of ~0.36. Without modelling the sawtooth profiles | |||
from all the other less prevalent congestion controllers, we | from all the other less prevalent congestion controllers, we | |||
estimate a 7:3 weighted average of these two, resulting in an | estimate a 7:3 weighted average of these two, resulting in an | |||
average geometry factor of 0.38.</li> | average geometry factor of 0.38.</li> | |||
<li>f is taken as 2. The factor f is a safety factor that increases | <li>f is taken as 2. The factor f is a safety factor that increases | |||
the target queue to allow for the distribution of RTT_typ around | the target queue to allow for the distribution of RTT_typ around | |||
its mean. Otherwise, the target queue would only avoid | its mean. Otherwise, the target queue would only avoid | |||
underutilization for those users below the mean. It also provides | underutilization for those users below the mean. It also provides | |||
a safety margin for the proportion of paths in use that span | a safety margin for the proportion of paths in use that span | |||
beyond the distance between a user and their local CDN. Currently, | beyond the distance between a user and their local CDN. Currently, | |||
no data is available on the variance of queue delay around the | no data is available on the variance of queue delay around the | |||
mean in each region, so there is plenty of room for this guess to | mean in each region, so there is plenty of room for this guess to | |||
become more educated.</li> | become more educated.</li> | |||
<li> | <li> | |||
<xref target="PI2param" format="default"/> recommends target = RTT_t yp * g * f = | <xref target="PI2param" format="default"/> recommends target = RTT_t yp * g * f = | |||
25ms * 0.38 * 2 = 19 ms. However, a further adjustment is | 25 ms * 0.38 * 2 = 19 ms. However, a further adjustment is | |||
warranted, because target is moving year-on-year. The paper is | warranted, because target is moving year-on-year. | |||
based on data collected in 2019, and it mentions evidence from | The paper is | |||
speedtest.net that suggests RTT_typ reduced by 17% (fixed) or 12% | based on data collected in 2019, and it mentions evidence from the S | |||
peedtest Global Index | ||||
that suggests RTT_typ reduced by 17% (fixed) or 12% | ||||
(mobile) between 2020 and 2021. Therefore, we recommend a default | (mobile) between 2020 and 2021. Therefore, we recommend a default | |||
of target = 15 ms at the time of writing (2021).</li> | of target = 15 ms at the time of writing (2021).</li> | |||
</ul> | </ul> | |||
<t>Operators can always use the data and discussion in <xref target="PI2 param" format="default"/> to configure a more appropriate target for their | <t>Operators can always use the data and discussion in <xref target="PI2 param" format="default"/> to configure a more appropriate target for their | |||
environment. For instance, an operator might wish to question the | environment. For instance, an operator might wish to question the | |||
assumptions called out in that paper, such as the goal of no | assumptions called out in that paper, such as the goal of no | |||
underutilization for a large majority of single flow transfers (given | underutilization for a large majority of single flow transfers (given | |||
many large transfers use multiple flows to avoid the scaling | many large transfers use multiple flows to avoid the scaling | |||
limitations of Classic flows).</t> | limitations of Classic flows).</t> | |||
<t>The two 'gain factors' in line 3 of <xref target="dualq_fig_Algo_pi2_ core" format="default"/>, alpha and beta, respectively | <t>The two 'gain factors' in line 3 of <xref target="dualq_fig_Algo_pi2_ core" format="default"/>, alpha and beta, respectively | |||
weight how strongly each of the two elements (Integral and | weight how strongly each of the two elements (Integral and | |||
Proportional) alters p'. They are in units of 'per second of delay' or | Proportional) alters p'. They are in units of 'per second of delay' or | |||
Hz, because they transform differences in queueing delay into changes | Hz, because they transform differences in queuing delay into changes | |||
in probability (assuming probability has a value from 0 to 1).</t> | in probability (assuming probability has a value from 0 to 1).</t> | |||
<t>Alpha and beta determine how much p' ought to change after each | <t>Alpha and beta determine how much p' ought to change after each | |||
update interval (Tupdate). For smaller Tupdate, p' should change by | update interval (Tupdate). For a smaller Tupdate, p' should change by | |||
the same amount per second, but in finer more frequent steps. So alpha | the same amount per second but in finer more frequent steps. So alpha | |||
depends on Tupdate (see line 13 of the initialization function in | depends on Tupdate (see line 13 of the initialization function in | |||
<xref target="dualq_fig_Algo_pi2_core_header" format="default"/>). It is best to update | <xref target="dualq_fig_Algo_pi2_core_header" format="default"/>). It is best to update | |||
p' as frequently as possible, but Tupdate will probably be constrained | p' as frequently as possible, but Tupdate will probably be constrained | |||
by hardware performance. As shown in line 13, the update interval | by hardware performance. As shown in line 12, the update interval | |||
should be frequent enough to update at least once in the time taken | should be frequent enough to update at least once in the time taken | |||
for the target queue to drain ('target') as long as it updates at | for the target queue to drain ('target') as long as it updates at | |||
least three times per maximum RTT. Tupdate defaults to 16 ms in the | least three times per maximum RTT. Tupdate defaults to 16 ms in the | |||
reference Linux implementation because it has to be rounded to a | reference Linux implementation because it has to be rounded to a | |||
multiple of 4 ms. For link rates from 4 to 200 Mb/s and a maximum RTT | multiple of 4 ms. For link rates from 4 to 200 Mb/s and a maximum RTT | |||
of 100ms, it has been verified through extensive testing that | of 100 ms, it has been verified through extensive testing that | |||
Tupdate=16ms (as also recommended in the PIE spec <xref target="RFC8033" | Tupdate = 16 ms (as also recommended in the PIE spec <xref target="RFC80 | |||
format="default"/>) is sufficient.</t> | 33" format="default"/>) is sufficient.</t> | |||
<t>The choice of alpha and beta also determines the AQM's stable | <t>The choice of alpha and beta also determines the AQM's stable | |||
operating range. The AQM ought to change p' as fast as possible in | operating range. The AQM ought to change p' as fast as possible in | |||
response to changes in load without over-compensating and therefore | response to changes in load without overcompensating and therefore | |||
causing oscillations in the queue. Therefore, the values of alpha and | causing oscillations in the queue. Therefore, the values of alpha and | |||
beta also depend on the RTT of the expected worst-case flow | beta also depend on the RTT of the expected worst-case flow | |||
(RTT_max).</t> | (RTT_max).</t> | |||
<t>The maximum RTT of a PI controller (RTT_max in line 10 of <xref targe t="dualq_fig_Algo_pi2_core_header" format="default"/>) is not an absolute maximu m, | <t>The maximum RTT of a PI controller (RTT_max in line 9 of <xref target ="dualq_fig_Algo_pi2_core_header" format="default"/>) is not an absolute maximum , | |||
but more instability (more queue variability) sets in for long-running | but more instability (more queue variability) sets in for long-running | |||
flows with an RTT above this value. The propagation delay halfway | flows with an RTT above this value. The propagation delay halfway | |||
round the planet and back in glass fibre is 200 ms. However, hardly | round the planet and back in glass fibre is 200 ms. However, hardly | |||
any traffic traverses such extreme paths and, since the significant | any traffic traverses such extreme paths and, since the significant | |||
consolidation of Internet traffic between 2007 and 2009 <xref target="La bovitz10" format="default"/>, a high and growing proportion of all Internet | consolidation of Internet traffic between 2007 and 2009 <xref target="La bovitz10" format="default"/>, a high and growing proportion of all Internet | |||
traffic (roughly two-thirds at the time of writing) has been served | traffic (roughly two-thirds at the time of writing) has been served | |||
from content distribution networks (CDNs) or 'cloud' services | from CDNs or 'cloud' services | |||
distributed close to end-users. The Internet might change again, but | distributed close to end users. The Internet might change again, but | |||
for now, designing for a maximum RTT of 100ms is a good compromise | for now, designing for a maximum RTT of 100 ms is a good compromise | |||
between faster queue control at low RTT and some instability on the | between faster queue control at low RTT and some instability on the | |||
occasions when a longer path is necessary.</t> | occasions when a longer path is necessary.</t> | |||
<t>Recommended derivations of the gain constants alpha and beta can be | <t>Recommended derivations of the gain constants alpha and beta can be | |||
approximated for Reno over a PI2 AQM as: alpha = 0.1 * Tupdate / | approximated for Reno over a PI2 AQM as: | |||
RTT_max^2; beta = 0.3 / RTT_max, as shown in lines 14 & 15 of | alpha = 0.1 * Tupdate / RTT_max^2; | |||
beta = 0.3 / RTT_max, | ||||
as shown in lines 13 and 14 of | ||||
<xref target="dualq_fig_Algo_pi2_core_header" format="default"/>. These are derived | <xref target="dualq_fig_Algo_pi2_core_header" format="default"/>. These are derived | |||
from the stability analysis in <xref target="PI2" format="default"/>. Fo r the default | from the stability analysis in <xref target="PI2" format="default"/>. Fo r the default | |||
values of Tupdate=16 ms and RTT_max = 100 ms, they result in alpha = | values of Tupdate = 16 ms and RTT_max = 100 ms, they result in alpha = | |||
0.16; beta = 3.2 (discrepancies are due to rounding). These defaults | 0.16; beta = 3.2 (discrepancies are due to rounding). These defaults | |||
have been verified with a wide range of link rates, target delays and | have been verified with a wide range of link rates, target delays, and | |||
a range of traffic models with mixed and similar RTTs, short and long | traffic models with mixed and similar RTTs, short and long | |||
flows, etc.</t> | flows, etc.</t> | |||
<t>In corner cases, p' can overflow the range [0,1] so the resulting | <t>In corner cases, p' can overflow the range [0,1] so the resulting | |||
value of p' has to be bounded (omitted from the pseudocode). Then, as | value of p' has to be bounded (omitted from the pseudocode). Then, as | |||
already explained, the coupled and Classic probabilities are derived | already explained, the coupled and Classic probabilities are derived | |||
from the new p' in lines 4 and 5 of <xref target="dualq_fig_Algo_pi2_cor e" format="default"/> as p_CL = k*p' and p_C = p'^2.</t> | from the new p' in lines 4 and 5 of <xref target="dualq_fig_Algo_pi2_cor e" format="default"/> as p_CL = k*p' and p_C = p'^2.</t> | |||
<t>Because the coupled L4S marking probability (p_CL) is factored up | <t>Because the coupled L4S marking probability (p_CL) is factored up | |||
by k, the dynamic gain parameters alpha and beta are also inherently | by k, the dynamic gain parameters alpha and beta are also inherently | |||
factored up by k for the L4S queue. So, the effective gain factor for | factored up by k for the L4S queue. So, the effective gain factor for | |||
the L4S queue is k*alpha (with defaults alpha = 0.16 Hz and k=2, | the L4S queue is k*alpha (with defaults alpha = 0.16 Hz and k = 2, | |||
effective L4S alpha = 0.32 Hz).</t> | effective L4S alpha = 0.32 Hz).</t> | |||
<t>Unlike in PIE <xref target="RFC8033" format="default"/>, alpha and be ta do not | <t>Unlike in PIE <xref target="RFC8033" format="default"/>, alpha and be ta do not | |||
need to be tuned every Tupdate dependent on p'. Instead, in PI2, alpha | need to be tuned every Tupdate dependent on p'. Instead, in PI2, alpha | |||
and beta are independent of p' because the squaring applied to Classic | and beta are independent of p' because the squaring applied to Classic | |||
traffic tunes them inherently. This is explained in <xref target="PI2" f ormat="default"/>, which also explains why this more principled approach | traffic tunes them inherently. This is explained in <xref target="PI2" f ormat="default"/>, which also explains why this more principled approach | |||
removes the need for most of the heuristics that had to be added to | removes the need for most of the heuristics that had to be added to | |||
PIE.</t> | PIE.</t> | |||
<t>Nonetheless, an implementer might wish to add selected details to | <t>Nonetheless, an implementer might wish to add selected details to | |||
either AQM. For instance the Linux reference DualPI2 implementation | either AQM. For instance, the Linux reference DualPI2 implementation | |||
includes the following (not shown in the pseudocode above):</t> | includes the following (not shown in the pseudocode above):</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>Classic and coupled marking or dropping (i.e. based on p_C | <li>Classic and coupled marking or dropping (i.e., based on p_C | |||
and p_CL from the PI controller) is not applied to a packet if the | and p_CL from the PI controller) is not applied to a packet if the | |||
aggregate queue length in bytes is < 2 MTU (prior to enqueuing | aggregate queue length in bytes is < 2 MTU (prior to enqueuing | |||
the packet or dequeuing it, depending on whether the AQM is | the packet or dequeuing it, depending on whether the AQM is | |||
configured to be applied at enqueue or dequeue);</li> | configured to be applied at enqueue or dequeue); and</li> | |||
<li>In the WRR scheduler, the 'credit' indicating which queue | <li>in the WRR scheduler, the 'credit' indicating which queue | |||
should transmit is only changed if there are packets in both | should transmit is only changed if there are packets in both | |||
queues (i.e. if there is actual resource contention). This | queues (i.e., if there is actual resource contention). This | |||
means that a properly paced L flow might never be delayed by the | means that a properly paced L flow might never be delayed by the | |||
WRR. The WRR credit is reset in favour of the L queue when the | WRR. The WRR credit is reset in favour of the L queue when the | |||
link is idle.</li> | link is idle.</li> | |||
</ul> | </ul> | |||
<t>An implementer might also wish to add other heuristics, | <t>An implementer might also wish to add other heuristics, | |||
e.g. burst protection <xref target="RFC8033" format="default"/> or enhan | e.g., burst protection <xref target="RFC8033" format="default"/> or enha | |||
ced | nced | |||
burst protection <xref target="RFC8034" format="default"/>.</t> | burst protection <xref target="RFC8034" format="default"/>.</t> | |||
<t>Notes:</t> | <t>Notes:</t> | |||
<ol spacing="normal" type="a"><li anchor="dualq_note_qdelay"> | <ol spacing="normal" type="a"> | |||
<li anchor="note_qdelay"> | ||||
<t>The drain rate of the queue can vary | <t>The drain rate of the queue can vary | |||
if it is scheduled relative to other queues, or to cater for | if it is scheduled relative to other queues or if it accommodates | |||
fluctuations in a wireless medium. To auto-adjust to changes in | fluctuations in a wireless medium. To auto-adjust to changes in | |||
drain rate, the queue needs to be measured in time, not bytes or | drain rate, the queue needs to be measured in time, not bytes or | |||
packets <xref target="AQMmetrics" format="default"/>, <xref target=" | packets <xref target="AQMmetrics" format="default"/> <xref target="C | |||
CoDel" format="default"/>. | oDel" format="default"/>. | |||
Queuing delay could be measured directly as the sojourn time (aka. | Queuing delay could be measured directly as the sojourn time (a.k.a. | |||
service time) of the queue, by storing a per-packet time-stamp as | service time) of the queue by storing a per-packet timestamp as | |||
each packet is enqueued, and subtracting this from the system time | each packet is enqueued and subtracting it from the system time | |||
when the packet is dequeued. If time-stamping is not easy to | when the packet is dequeued. If timestamping is not easy to | |||
introduce with certain hardware, queuing delay could be predicted | introduce with certain hardware, queuing delay could be predicted | |||
indirectly by dividing the size of the queue by the predicted | indirectly by dividing the size of the queue by the predicted | |||
departure rate, which might be known precisely for some link | departure rate, which might be known precisely for some link | |||
technologies (see for example in DOCSIS PIE [RFC8034]). </t> | technologies (see, for example, DOCSIS PIE <xref target="RFC8034"/>) . </t> | |||
<t>However, sojourn time is slow to detect bursts. | <t>However, sojourn time is slow to detect bursts. | |||
For instance, if a burst arrives at an empty queue, the sojourn | For instance, if a burst arrives at an empty queue, the sojourn | |||
time only fully measures the burst's delay when its last packet is | time only fully measures the burst's delay when its last packet is | |||
dequeued, even though the queue has known the size of the burst | dequeued, even though the queue has known the size of the burst | |||
since its last packet was enqueued - so it could have signalled | since its last packet was enqueued -- so it could have signalled | |||
congestion earlier. To remedy this, each head packet can be marked | congestion earlier. To remedy this, each head packet can be marked | |||
when it is dequeued based on the expected delay of the tail packet | when it is dequeued based on the expected delay of the tail packet | |||
behind it, as explained below, rather than based on the head | behind it, as explained below, rather than based on the head | |||
packet's own delay due to the packets in front of it. <xref target=" Heist21" format="default"/> identifies a specific scenario where bursty | packet's own delay due to the packets in front of it. "Underutilizat ion with Bursty Traffic" in <xref target="Heist21" format="default"/> identifies a specific scenario where bursty | |||
traffic significantly hits utilization of the L queue. If this | traffic significantly hits utilization of the L queue. If this | |||
effect proves to be more widely applicable, using the delay behind | effect proves to be more widely applicable, using the delay behind | |||
the head could improve performance.</t> | the head could improve performance.</t> | |||
<t>The | <t>The | |||
delay behind the head can be implemented by dividing the backlog | delay behind the head can be implemented by dividing the backlog | |||
at dequeue by the link rate or equivalently multiplying the | at dequeue by the link rate or equivalently multiplying the | |||
backlog by the delay per unit of backlog. The implementation | backlog by the delay per unit of backlog. The implementation | |||
details will depend on whether the link rate is known; if it is | details will depend on whether the link rate is known; if it is | |||
not, a moving average of the delay per unit backlog can be | not, a moving average of the delay per unit backlog can be | |||
maintained. This delay consists of serialization as well as media | maintained. This delay consists of serialization as well as media | |||
acquisition for shared media. So the details will depend strongly | acquisition for shared media. So the details will depend strongly | |||
on the specific link technology, This approach should be less | on the specific link technology. This approach should be less | |||
sensitive to timing errors and cost less in operations and memory | sensitive to timing errors and cost less in operations and memory | |||
than the otherwise equivalent 'scaled sojourn time' metric, which | than the otherwise equivalent 'scaled sojourn time' metric, which | |||
is the sojourn time of a packet scaled by the ratio of the queue | is the sojourn time of a packet scaled by the ratio of the queue | |||
sizes when the packet departed and arrived <xref target="SigQ-Dyn" f ormat="default"/>.</t> | sizes when the packet departed and arrived <xref target="SigQ-Dyn" f ormat="default"/>.</t> | |||
</li> | </li> | |||
<li>Line 2 of the dualpi2_enqueue() function (<xref target="dualq_fig_ Algo_pi2_enqueue" format="default"/>) assumes an implementation | <li anchor="note_separate_buffers">Line 2 of the dualpi2_enqueue() fun ction (<xref target="dualq_fig_Algo_pi2_enqueue" format="default"/>) assumes an implementation | |||
where lq and cq share common buffer memory. An alternative | where lq and cq share common buffer memory. An alternative | |||
implementation could use separate buffers for each queue, in which | implementation could use separate buffers for each queue, in which | |||
case the arriving packet would have to be classified first to | case the arriving packet would have to be classified first to | |||
determine which buffer to check for available space. The choice is | determine which buffer to check for available space. The choice is | |||
a trade-off; a shared buffer can use less memory whereas separate | a trade-off; a shared buffer can use less memory whereas separate | |||
buffers isolate the L4S queue from tail-drop due to large bursts | buffers isolate the L4S queue from tail drop due to large bursts | |||
of Classic traffic (e.g. a Classic Reno TCP during slow-start | of Classic traffic (e.g., a Classic Reno TCP during slow-start | |||
over a long RTT).</li> | over a long RTT).</li> | |||
<li> | <li anchor="note_ramp"> | |||
<t>There has been some concern that using the step function of | <t>There has been some concern that using the step function of | |||
DCTCP for the Native L4S AQM requires end-systems to smooth the | DCTCP for the Native L4S AQM requires end systems to smooth the | |||
signal for an unnecessarily large number of round trips to ensure | signal for an unnecessarily large number of round trips to ensure | |||
sufficient fidelity. A ramp is no worse than a step in initial | sufficient fidelity. A ramp is no worse than a step in initial | |||
experiments with existing DCTCP. Therefore, it is recommended that | experiments with existing DCTCP. Therefore, it is recommended that | |||
a ramp is configured in place of a step, which will allow | a ramp is configured in place of a step, which will allow | |||
congestion control algorithms to investigate faster smoothing | congestion control algorithms to investigate faster smoothing | |||
algorithms.</t> | algorithms.</t> | |||
<t>A ramp is more general that a | <t>A ramp is more general than a | |||
step, because an operator can effectively turn the ramp into a | step, because an operator can effectively turn the ramp into a | |||
step function, as used by DCTCP, by setting the range to zero. | step function, as used by DCTCP, by setting the range to zero. | |||
There will not be a divide by zero problem at line 5 of <xref target ="dualq_fig_Algo_laqm_core" format="default"/> because, if minTh is equal to | There will not be a divide by zero problem at line 5 of <xref target ="dualq_fig_Algo_laqm_core" format="default"/> because, if minTh is equal to | |||
maxTh, the condition for this ramp calculation cannot arise.</t> | maxTh, the condition for this ramp calculation cannot arise.</t> | |||
</li> | </li> | |||
</ol> | </ol> | |||
</section> | </section> | |||
<section anchor="dualq_Ex_algo_pi2-2" numbered="true" toc="default"> | <section anchor="dualq_Ex_algo_pi2-2" numbered="true" toc="default"> | |||
<name>Pass #2: Edge-Case Details</name> | <name>Pass #2: Edge-Case Details</name> | |||
<t>This section takes a second pass through the pseudocode adding | <t>This section takes a second pass through the pseudocode to add | |||
details of two edge-cases: low link rate and overload. <xref target="dua lq_fig_Algo_pi2_full_dequeue" format="default"/> repeats the dequeue | details of two edge-cases: low link rate and overload. <xref target="dua lq_fig_Algo_pi2_full_dequeue" format="default"/> repeats the dequeue | |||
function of <xref target="dualq_fig_Algo_pi2_dequeue" format="default"/> , but with | function of <xref target="dualq_fig_Algo_pi2_dequeue" format="default"/> , but with | |||
details of both edge-cases added. Similarly, <xref target="dualq_fig_Alg o_pi2_full_core" format="default"/> repeats the core PI algorithm | details of both edge-cases added. Similarly, <xref target="dualq_fig_Alg o_pi2_full_core" format="default"/> repeats the core PI algorithm | |||
of <xref target="dualq_fig_Algo_pi2_core" format="default"/>, but with o verload details | of <xref target="dualq_fig_Algo_pi2_core" format="default"/>, but with o verload details | |||
added. The initialization, enqueue, L4S AQM and recur functions are | added. The initialization, enqueue, L4S AQM, and recur functions are | |||
unchanged.</t> | unchanged.</t> | |||
<t>The link rate can be so low that it takes a single packet queue | <t>The link rate can be so low that it takes a single packet queue | |||
longer to serialize than the threshold delay at which ECN marking | longer to serialize than the threshold delay at which ECN marking | |||
starts to be applied in the L queue. Therefore, a minimum marking | starts to be applied in the L queue. Therefore, a minimum marking | |||
threshold parameter in units of packets rather than time is necessary | threshold parameter in units of packets rather than time is necessary | |||
(Th_len, default 1 packet in line 19 of <xref target="dualq_fig_Algo_pi2 _core_header" format="default"/>) to ensure that the ramp | (Th_len, default 1 packet in line 19 of <xref target="dualq_fig_Algo_pi2 _core_header" format="default"/>) to ensure that the ramp | |||
does not trigger excessive marking on slow links. Where an | does not trigger excessive marking on slow links. Where an | |||
implementation knows the link rate, it can set up this minimum at the | implementation knows the link rate, it can set up this minimum at the | |||
time it is configured. For instance, it would divide 1 MTU by the link | time it is configured. | |||
For instance, it would divide 1 MTU by the link | ||||
rate to convert it into a serialization time, then if the lower | rate to convert it into a serialization time, then if the lower | |||
threshold of the Native L AQM ramp was lower than this serialization | threshold of the Native L AQM ramp was lower than this serialization | |||
time, it could increase the thresholds to shift the bottom of the ramp | time, it could increase the thresholds to shift the bottom of the ramp | |||
to 2 MTU. This is the approach used in DOCSIS <xref target="DOCSIS3.1" f ormat="default"/>, because the configured link rate is dedicated to | to 2 MTU. This is the approach used in DOCSIS <xref target="DOCSIS3.1" f ormat="default"/>, because the configured link rate is dedicated to | |||
the DualQ.</t> | the DualQ.</t> | |||
<t>The pseudocode given here applies where the link rate is unknown, | <t>The pseudocode given here applies where the link rate is unknown, | |||
which is more common for software implementations that might be | which is more common for software implementations that might be | |||
deployed in scenarios where the link is shared with other queues. In | deployed in scenarios where the link is shared with other queues. In | |||
lines 5a to 5d in <xref target="dualq_fig_Algo_pi2_full_dequeue" format= "default"/> the | lines 5a to 5d in <xref target="dualq_fig_Algo_pi2_full_dequeue" format= "default"/>, the | |||
native L4S marking probability, p'_L, is zeroed if the queue is only 1 | native L4S marking probability, p'_L, is zeroed if the queue is only 1 | |||
packet (in the default configuration).</t> | packet (in the default configuration).</t> | |||
<t>Linux implementation note:</t> | <aside><t>Linux implementation note: In Linux, the check that the | |||
<ul spacing="normal"> | queue exceeds Th_len before marking with the Native L4S AQM is | |||
<li>In Linux, the check that the queue exceeds Th_len before | actually at enqueue, not dequeue; otherwise, it would exempt the last | |||
marking with the native L4S AQM is actually at enqueue, not | packet of a burst from being marked. The result of the check is | |||
dequeue, otherwise it would exempt the last packet of a burst from | conveyed from enqueue to the dequeue function via a boolean in the | |||
being marked. The result of the check is conveyed from enqueue to | packet metadata.</t> | |||
the dequeue function via a boolean in the packet metadata.</li> | </aside> | |||
</ul> | ||||
<t>Persistent overload is deemed to have occurred when Classic | <t>Persistent overload is deemed to have occurred when Classic | |||
drop/marking probability reaches p_Cmax. Above this point, the Classic | drop/marking probability reaches p_Cmax. Above this point, the Classic | |||
drop probability is applied to both L and C queues, irrespective of | drop probability is applied to both the L and C queues, irrespective of | |||
whether any packet is ECN-capable. ECT packets that are not dropped | whether any packet is ECN-capable. ECT packets that are not dropped | |||
can still be ECN-marked.</t> | can still be ECN-marked.</t> | |||
<t>In line 10 of the initialization function (<xref target="dualq_fig_Al | ||||
go_pi2_core_header" format="default"/>), the maximum Classic drop | <t>In line 11 of the initialization function (<xref target="dualq_fig_Al | |||
go_pi2_core_header" format="default"/>), the maximum Classic drop | ||||
probability p_Cmax = min(1/k^2, 1) or 1/4 for the default coupling | probability p_Cmax = min(1/k^2, 1) or 1/4 for the default coupling | |||
factor k=2. In practice, 25% has been found to be a good threshold to | factor k = 2. In practice, 25% has been found to be a good threshold to | |||
preserve fairness between ECN capable and non ECN capable traffic. | preserve fairness between ECN-capable and non-ECN-capable traffic. | |||
This protects the queues against both temporary overload from | This protects the queues against both temporary overload from | |||
responsive flows and more persistent overload from any unresponsive | responsive flows and more persistent overload from any unresponsive | |||
traffic that falsely claims to be responsive to ECN.</t> | traffic that falsely claims to be responsive to ECN.</t> | |||
<t>When the Classic ECN marking probability reaches the p_Cmax | <t>When the Classic ECN-marking probability reaches the p_Cmax | |||
threshold (1/k^2), the marking probability coupled to the L4S queue, | threshold (1/k^2), the marking probability that is coupled to the L4S qu | |||
p_CL will always be 100% for any k (by equation (1) in <xref target="dua | eue, | |||
lq_algo" format="default"/>). So, for readability, the constant p_Lmax is | p_CL, will always be 100% for any k (by equation (1) in <xref target="du | |||
defined as 1 in line 22 of the initialization function (<xref target="du | alq_coupled" format="default"/>). So, for readability, the constant p_Lmax is | |||
alq_fig_Algo_pi2_core_header" format="default"/>). This is intended to ensure | defined as 1 in line 21 of the initialization function (<xref target="du | |||
that the L4S queue starts to introduce dropping once ECN-marking | alq_fig_Algo_pi2_core_header" format="default"/>). This is intended to ensure | |||
saturates at 100% and can rise no further. The 'Prague L4S' | that the L4S queue starts to introduce dropping once ECN marking | |||
requirements <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="default"/> | saturates at 100% and can rise no further. The 'Prague L4S | |||
state | requirements' <xref target="RFC9331" format="default"/> state | |||
that, when an L4S congestion control detects a drop, it falls back to | that when an L4S congestion control detects a drop, it falls back to | |||
a response that coexists with 'Classic' Reno congestion control. So it | a response that coexists with 'Classic' Reno congestion control. So, it | |||
is correct that, when the L4S queue drops packets, it drops them | is correct that when the L4S queue drops packets, it drops them | |||
proportional to p'^2, as if they are Classic packets.</t> | proportional to p'^2, as if they are Classic packets.</t> | |||
<t>The two queues each test for overload in lines 4b and 12b of the | <t>The two queues each test for overload in lines 4b and 12b of the | |||
dequeue function (<xref target="dualq_fig_Algo_pi2_full_dequeue" format= "default"/>). | dequeue function (<xref target="dualq_fig_Algo_pi2_full_dequeue" format= "default"/>). | |||
Lines 8c to 8g drop L4S packets with probability p'^2. Lines 8h to 8i | Lines 8c to 8g drop L4S packets with probability p'^2. Lines 8h to 8i | |||
mark the remaining packets with probability p_CL. Given p_Lmax = 1, | mark the remaining packets with probability p_CL. Given p_Lmax = 1, | |||
all remaining packets will be marked because, to have reached the else | all remaining packets will be marked because, to have reached the else | |||
block at line 8b, p_CL >= 1.</t> | block at line 8b, p_CL >= 1.</t> | |||
<t>Line 2a in the core PI algorithm (<xref target="dualq_fig_Algo_pi2_fu ll_core" format="default"/>) deals with overload of the | <t>Line 2a in the core PI algorithm (<xref target="dualq_fig_Algo_pi2_fu ll_core" format="default"/>) deals with overload of the | |||
L4S queue when there is little or no Classic traffic. This is | L4S queue when there is little or no Classic traffic. This is | |||
necessary, because the core PI algorithm maintains the appropriate | necessary, because the core PI algorithm maintains the appropriate | |||
drop probability to regulate overload, but it depends on the length of | drop probability to regulate overload, but it depends on the length of | |||
the Classic queue. If there is little or no Classic queue the naive PI | the Classic queue. If there is little or no Classic queue, the naive PI- | |||
update function in <xref target="dualq_fig_Algo_pi2_core" format="defaul | update function | |||
t"/> would drop | (<xref target="dualq_fig_Algo_pi2_core" format="default"/>) would drop | |||
nothing, even if the L4S queue were overloaded - so tail drop would | nothing, even if the L4S queue were overloaded -- so tail drop would | |||
have to take over (lines 2 and 3 of <xref target="dualq_fig_Algo_pi2_enq ueue" format="default"/>).</t> | have to take over (lines 2 and 3 of <xref target="dualq_fig_Algo_pi2_enq ueue" format="default"/>).</t> | |||
<t>Instead, line 2a of the full PI update function in <xref target="dual q_fig_Algo_pi2_full_core" format="default"/> ensures that the base PI AQM | <t>Instead, line 2a of the full PI-update function (<xref target="dualq_ fig_Algo_pi2_full_core" format="default"/>) ensures that the Base PI AQM | |||
in line 3 is driven by whichever of the two queue delays is greater, | in line 3 is driven by whichever of the two queue delays is greater, | |||
but line 3 still always uses the same Classic target (default 15 ms). | but line 3 still always uses the same Classic target (default 15 ms). | |||
If L queue delay is greater just because there is little or no Classic | If L queue delay is greater just because there is little or no Classic | |||
traffic, normally it will still be well below the base AQM target. | traffic, normally it will still be well below the Base AQM target. | |||
This is because L4S traffic is also governed by the shallow threshold | This is because L4S traffic is also governed by the shallow threshold | |||
of its own native AQM (lines 5 and 6 of the dequeue algorithm in <xref t | of its own Native AQM (lines 5a to 6 of the dequeue algorithm in <xref t | |||
arget="dualq_fig_Algo_pi2_full_dequeue" format="default"/>). So the base AQM wil | arget="dualq_fig_Algo_pi2_full_dequeue" format="default"/>). So the Base AQM wil | |||
l be | l be | |||
driven to zero and not contribute. However, if the L queue is | driven to zero and not contribute. | |||
However, if the L queue is | ||||
overloaded by traffic that is unresponsive to its marking, the max() | overloaded by traffic that is unresponsive to its marking, the max() | |||
in line 2 enables the L queue to smoothly take over driving the base | in line 2a of <xref target="dualq_fig_Algo_pi2_full_core" format="defaul t"/> enables the L queue to smoothly take over driving the Base | |||
AQM into overload mode even if there is little or no Classic traffic. | AQM into overload mode even if there is little or no Classic traffic. | |||
Then the base AQM will keep the L queue to the Classic target (default | Then the Base AQM will keep the L queue to the Classic target (default | |||
15 ms) by shedding L packets.</t> | 15 ms) by shedding L packets.</t> | |||
<figure anchor="dualq_fig_Algo_pi2_full_dequeue"> | <figure anchor="dualq_fig_Algo_pi2_full_dequeue"> | |||
<name>Example Dequeue Pseudocode for DualQ Coupled PI2 AQM (Including Code for Edge-Cases)</name> | <name>Example Dequeue Pseudocode for DualQ Coupled PI2 AQM (Including Code for Edge-Cases)</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_dequ | <sourcecode><![CDATA[ | |||
eue(lq, cq, pkt) { % Couples L4S & Classic queues | 1: dualpi2_dequeue(lq, cq, pkt) { % Couples L4S & Classic queues | |||
2: while ( lq.byt() + cq.byt() > 0 ) { | 2: while ( lq.byt() + cq.byt() > 0 ) { | |||
3: if ( scheduler() == lq ) { | 3: if ( scheduler() == lq ) { | |||
4a: lq.dequeue(pkt) % L4S scheduled | 4a: lq.dequeue(pkt) % L4S scheduled | |||
4b: if ( p_CL < p_Lmax ) { % Check for overload saturation | 4b: if ( p_CL < p_Lmax ) { % Check for overload saturation | |||
5a: if (lq.len()>Th_len) % >1 packet queued | 5a: if (lq.len()>Th_len) % >1 packet queued | |||
5b: p'_L = laqm(lq.time()) % Native LAQM | 5b: p'_L = laqm(lq.time()) % Native LAQM | |||
5c: else | 5c: else | |||
5d: p'_L = 0 % Suppress marking 1 pkt queue | 5d: p'_L = 0 % Suppress marking 1 pkt queue | |||
6: p_L = max(p'_L, p_CL) % Combining function | 6: p_L = max(p'_L, p_CL) % Combining function | |||
7: if ( recur(lq, p_L) %Linear marking | 7: if ( recur(lq, p_L) %Linear marking | |||
skipping to change at line 3075 ¶ | skipping to change at line 2459 ¶ | |||
13: drop(pkt) % squared drop, redo loop | 13: drop(pkt) % squared drop, redo loop | |||
14: continue % continue to the top of the while loop | 14: continue % continue to the top of the while loop | |||
15: } | 15: } | |||
16: mark(pkt) % squared mark | 16: mark(pkt) % squared mark | |||
17: } | 17: } | |||
18: } | 18: } | |||
19: return(pkt) % return the packet and stop | 19: return(pkt) % return the packet and stop | |||
20: } | 20: } | |||
21: return(NULL) % no packet to dequeue | 21: return(NULL) % no packet to dequeue | |||
22: } | 22: } | |||
]]></artwork> | ]]></sourcecode> | |||
</figure> | </figure> | |||
<figure anchor="dualq_fig_Algo_pi2_full_core"> | <figure anchor="dualq_fig_Algo_pi2_full_core"> | |||
<name>Example PI-Update Pseudocode for DualQ Coupled PI2 AQM (Includin | <name>Example PI-update Pseudocode for DualQ Coupled PI2 AQM (Includin | |||
g Overload Code)</name> | g Overload Code)</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: dualpi2_upda | <sourcecode><![CDATA[ | |||
te(lq, cq) { % Update p' every Tupdate | 1: dualpi2_update(lq, cq) { % Update p' every Tupdate | |||
2a: curq = max(cq.time(), lq.time()) % use greatest queuing time | 2a: curq = max(cq.time(), lq.time()) % use greatest queuing time | |||
3: p' = p' + alpha * (curq - target) + beta * (curq - prevq) | 3: p' = p' + alpha * (curq - target) + beta * (curq - prevq) | |||
4: p_CL = p' * k % Coupled L4S prob = base prob * coupling factor | 4: p_CL = p' * k % Coupled L4S prob = base prob * coupling factor | |||
5: p_C = p'^2 % Classic prob = (base prob)^2 | 5: p_C = p'^2 % Classic prob = (base prob)^2 | |||
6: prevq = curq | 6: prevq = curq | |||
7: } | 7: } | |||
]]></artwork> | ]]></sourcecode> | |||
</figure> | </figure> | |||
<t/> | <t/> | |||
<t>The choice of scheduler technology is critical to overload | <t>The choice of scheduler technology is critical to overload | |||
protection (see <xref target="dualq_Overload_Starvation" format="default "/>). </t> | protection (see <xref target="dualq_Overload_Starvation" format="default "/>). </t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>A well-understood weighted scheduler such as weighted | <li>A well-understood weighted scheduler such as WRR is recommended. A | |||
round-robin (WRR) is recommended. As long as the scheduler weight | s long as the scheduler weight | |||
for Classic is small (e.g. 1/16), its exact value is | for Classic is small (e.g., 1/16), its exact value is | |||
unimportant because it does not normally determine capacity | unimportant, because it does not normally determine capacity | |||
shares. The weight is only important to prevent unresponsive L4S | shares. The weight is only important to prevent unresponsive L4S | |||
traffic starving Classic traffic in the short term (see <xref target ="dualq_Overload_Starvation" format="default"/>). This is because capacity | traffic starving Classic traffic in the short term (see <xref target ="dualq_Overload_Starvation" format="default"/>). This is because capacity | |||
sharing between the queues is normally determined by the coupled | sharing between the queues is normally determined by the coupled | |||
congestion signal, which overrides the scheduler, by making L4S | congestion signal, which overrides the scheduler, by making L4S | |||
sources leave roughly equal per-flow capacity available for | sources leave roughly equal per-flow capacity available for | |||
Classic flows.</li> | Classic flows.</li> | |||
<li> | <li> | |||
<t>Alternatively, a time-shifted FIFO (TS-FIFO) could be used. It | <t>Alternatively, a time-shifted FIFO (TS-FIFO) could be used. It | |||
works by selecting the head packet that has waited the longest, | works by selecting the head packet that has waited the longest, | |||
biased against the Classic traffic by a time-shift of tshift. To | biased against the Classic traffic by a time-shift of tshift. To | |||
implement time-shifted FIFO, the scheduler() function in line 3 of | implement TS-FIFO, the scheduler() function in line 3 of | |||
the dequeue code would simply be implemented as the scheduler() | the dequeue code would simply be implemented as the scheduler() | |||
function at the bottom of <xref target="dualq_fig_Algo_Real" format= "default"/> in | function at the bottom of <xref target="dualq_fig_Algo_Real" format= "default"/> in | |||
<xref target="dualq_Ex_algo" format="default"/>. For the public Inte | <xref target="dualq_Ex_algo" format="default"/>. For the public Inte | |||
rnet a good | rnet, a good | |||
value for tshift is 50ms. For private networks with smaller | value for tshift is 50 ms. For private networks with smaller | |||
diameter, about 4*target would be reasonable. TS-FIFO is a very | diameter, about 4*target would be reasonable. TS-FIFO is a very | |||
simple scheduler, but complexity might need to be added to address | simple scheduler, but complexity might need to be added to address | |||
some deficiencies (which is why it is not recommended over | some deficiencies (which is why it is not recommended over | |||
WRR):</t> | WRR):</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>TS-FIFO does not fully isolate latency in the L4S queue | <li>TS-FIFO does not fully isolate latency in the L4S queue | |||
from uncontrolled bursts in the Classic queue;</li> | from uncontrolled bursts in the Classic queue;</li> | |||
<li>Using sojourn time for TS-FIFO is only appropriate if | <li>using sojourn time for TS-FIFO is only appropriate if | |||
time-stamping of packets is feasible;</li> | timestamping of packets is feasible; and</li> | |||
<li>Even if time-stamping is supported, the sojourn time of the | <li>even if timestamping is supported, the sojourn time of the | |||
head packet is always stale, so a more instantaneous measure | head packet is always stale, so a more instantaneous measure | |||
of queue delay could be used (see Note a in <xref target="dualq_ Ex_algo_pi2-1" format="default"/>).</li> | of queue delay could be used (see <xref target="note_qdelay" for mat="none">Note a</xref> in <xref target="dualq_Ex_algo_pi2-1" format="default"/ >).</li> | |||
</ul> | </ul> | |||
</li> | </li> | |||
<li>A strict priority scheduler would be inappropriate as discussed | <li>A strict priority scheduler would be inappropriate as discussed | |||
in <xref target="dualq_Overload_Starvation" format="default"/>.</li> | in <xref target="dualq_Overload_Starvation" format="default"/>.</li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="dualq_Ex_algo" numbered="true" toc="default"> | <section anchor="dualq_Ex_algo" numbered="true" toc="default"> | |||
<name>Example DualQ Coupled Curvy RED Algorithm</name> | <name>Example DualQ Coupled Curvy RED Algorithm</name> | |||
<t>As another example of a DualQ Coupled AQM algorithm, the pseudocode | <t>As another example of a DualQ Coupled AQM algorithm, the pseudocode | |||
below gives the Curvy RED based algorithm. Although the AQM was designed | below gives the Curvy-RED-based algorithm. Although the AQM was designed | |||
to be efficient in integer arithmetic, to aid understanding it is first | to be efficient in integer arithmetic, to aid understanding it is first | |||
given using floating point arithmetic (<xref target="dualq_fig_Algo_Real" format="default"/>). Then, one possible optimization for | given using floating point arithmetic (<xref target="dualq_fig_Algo_Real" format="default"/>). Then, one possible optimization for | |||
integer arithmetic is given, also in pseudocode (<xref target="dualq_fig_A lgo_Int" format="default"/>). To aid comparison, the line numbers are | integer arithmetic is given, also in pseudocode (<xref target="dualq_fig_A lgo_Int" format="default"/>). To aid comparison, the line numbers are | |||
kept in step between the two by using letter suffixes where the longer | kept in step between the two by using letter suffixes where the longer | |||
code needs extra lines.</t> | code needs extra lines.</t> | |||
<section anchor="dualq_Ex_algo_float" numbered="true" toc="default"> | <section anchor="dualq_Ex_algo_float" numbered="true" toc="default"> | |||
<name>Curvy RED in Pseudocode</name> | <name>Curvy RED in Pseudocode</name> | |||
<t>The pseudocode manipulates three main structures of variables: the | <t>The pseudocode manipulates three main structures of variables: the | |||
packet (pkt), the L4S queue (lq) and the Classic queue (cq) and | packet (pkt), the L4S queue (lq), and the Classic queue (cq). It is defi | |||
consists of the following five functions:</t> | ned | |||
and described below in the following three functions:</t> | ||||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>The initialization function cred_params_init(...) (<xref target="d ualq_fig_Algo_pi2_core_header" format="default"/>) that sets parameter | <li>the initialization function cred_params_init(...) (<xref target="d ualq_fig_Algo_pi2_core_header" format="default"/>) that sets parameter | |||
defaults (the API for setting non-default values is omitted for | defaults (the API for setting non-default values is omitted for | |||
brevity);</li> | brevity);</li> | |||
<li>The dequeue function cred_dequeue(lq, cq, pkt) (<xref target="dual | <li>the dequeue function cred_dequeue(lq, cq, pkt) (<xref target="dual | |||
q_fig_Algo_pi2_dequeue" format="default"/>);</li> | q_fig_Algo_pi2_dequeue" format="default"/>); and</li> | |||
<li>The scheduling function scheduler(), which selects between the | <li>the scheduling function scheduler(), which selects between the | |||
head packets of the two queues.</li> | head packets of the two queues.</li> | |||
</ul> | </ul> | |||
<t>It also uses the following functions that are either shown | <t>It also uses the following functions that are either shown | |||
elsewhere, or not shown in full here:</t> | elsewhere or not shown in full here:</t> | |||
<ul spacing="normal"> | <ul spacing="normal"> | |||
<li>The enqueue function, which is identical to that used for | <li>the enqueue function, which is identical to that used for | |||
DualPI2, dualpi2_enqueue(lq, cq, pkt) in <xref target="dualq_fig_Alg o_pi2_enqueue" format="default"/>;</li> | DualPI2, dualpi2_enqueue(lq, cq, pkt) in <xref target="dualq_fig_Alg o_pi2_enqueue" format="default"/>;</li> | |||
<li>mark(pkt) and drop(pkt) for ECN-marking and dropping a | <li>mark(pkt) and drop(pkt) for ECN marking and dropping a | |||
packet;</li> | packet;</li> | |||
<li>cq.byt() or lq.byt() returns the current length | <li>cq.byt() or lq.byt() returns the current length | |||
(aka. backlog) of the relevant queue in bytes;</li> | (a.k.a. backlog) of the relevant queue in bytes; and</li> | |||
<li>cq.time() or lq.time() returns the current queuing delay of the | <li>cq.time() or lq.time() returns the current queuing delay of the | |||
relevant queue in units of time (see Note a in <xref target="dualq_E x_algo_pi2-1" format="default"/>).</li> | relevant queue in units of time (see <xref target="note_qdelay" form at="none">Note a</xref> in <xref target="dualq_Ex_algo_pi2-1" format="default"/> ).</li> | |||
</ul> | </ul> | |||
<t>Because Curvy RED was evaluated before DualPI2, certain | <t>Because Curvy RED was evaluated before DualPI2, certain | |||
improvements introduced for DualPI2 were not evaluated for Curvy RED. | improvements introduced for DualPI2 were not evaluated for Curvy RED. | |||
In the pseudocode below, the straightforward improvements have been | In the pseudocode below, the straightforward improvements have been | |||
added on the assumption they will provide similar benefits, but that | added on the assumption they will provide similar benefits, but that | |||
has not been proven experimentally. They are: i) a conditional | has not been proven experimentally. They are: i) a conditional | |||
priority scheduler instead of strict priority ii) a time-based | priority scheduler instead of strict priority; ii) a time-based | |||
threshold for the native L4S AQM; iii) ECN support for the Classic | threshold for the Native L4S AQM; and iii) ECN support for the Classic | |||
AQM. A recent evaluation has proved that a minimum ECN-marking | AQM. A recent evaluation has proved that a minimum ECN-marking | |||
threshold (minTh) greatly improves performance, so this is also | threshold (minTh) greatly improves performance, so this is also | |||
included in the pseudocode.</t> | included in the pseudocode.</t> | |||
<t>Overload protection has not been added to the Curvy RED pseudocode | <t>Overload protection has not been added to the Curvy RED pseudocode | |||
below so as not to detract from the main features. It would be added | below so as not to detract from the main features. It would be added | |||
in exactly the same way as in <xref target="dualq_Ex_algo_pi2-2" format= "default"/> for | in exactly the same way as in <xref target="dualq_Ex_algo_pi2-2" format= "default"/> for | |||
the DualPI2 pseudocode. The native L4S AQM uses a step threshold, but | the DualPI2 pseudocode. The Native L4S AQM uses a step threshold, but | |||
a ramp like that described for DualPI2 could be used instead. The | a ramp like that described for DualPI2 could be used instead. The | |||
scheduler uses the simple TS-FIFO algorithm, but it could be replaced | scheduler uses the simple TS-FIFO algorithm, but it could be replaced | |||
with WRR.</t> | with WRR.</t> | |||
<t>The Curvy RED algorithm has not been maintained or evaluated to the | <t>The Curvy RED algorithm has not been maintained or evaluated to the | |||
same degree as the DualPI2 algorithm. In initial experiments on | same degree as the DualPI2 algorithm. In initial experiments on | |||
broadband access links ranging from 4 Mb/s to 200 Mb/s with base RTTs | broadband access links ranging from 4 Mb/s to 200 Mb/s with base RTTs | |||
from 5 ms to 100 ms, Curvy RED achieved good results with the default | from 5 ms to 100 ms, Curvy RED achieved good results with the default | |||
parameters in <xref target="dualq_fig_Algo_cred_core_header" format="def ault"/>.</t> | parameters in <xref target="dualq_fig_Algo_cred_core_header" format="def ault"/>.</t> | |||
<t>The parameters are categorised by whether they relate to the | <t>The parameters are categorized by whether they relate to the | |||
Classic AQM, the L4S AQM or the framework coupling them together. | Classic AQM, the L4S AQM, or the framework coupling them together. | |||
Constants and variables derived from these parameters are also | Constants and variables derived from these parameters are also | |||
included at the end of each category. These are the raw input | included at the end of each category. These are the raw input | |||
parameters for the algorithm. A configuration front-end could accept | parameters for the algorithm. A configuration front-end could accept | |||
more meaningful parameters (e.g. RTT_max and RTT_typ) and convert | more meaningful parameters (e.g., RTT_max and RTT_typ) and convert | |||
them into these raw parameters, as has been done for DualPI2 in <xref ta rget="dualq_Ex_algo_pi2" format="default"/>. Where necessary, parameters are | them into these raw parameters, as has been done for DualPI2 in <xref ta rget="dualq_Ex_algo_pi2" format="default"/>. Where necessary, parameters are | |||
explained further in the walk-through of the pseudocode below.</t> | explained further in the walk-through of the pseudocode below.</t> | |||
<figure anchor="dualq_fig_Algo_cred_core_header"> | <figure anchor="dualq_fig_Algo_cred_core_header"> | |||
<name>Example Header Pseudocode for DualQ Coupled Curvy RED AQM</name> | <name>Example Header Pseudocode for DualQ Coupled Curvy RED AQM</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: cred_params_ | <sourcecode><![CDATA[ | |||
init(...) { % Set input parameter defaults | 1: cred_params_init(...) { % Set input parameter defaults | |||
2: % DualQ Coupled framework parameters | 2: % DualQ Coupled framework parameters | |||
3: limit = MAX_LINK_RATE * 250 ms % Dual buffer size | 3: limit = MAX_LINK_RATE * 250 ms % Dual buffer size | |||
4: k' = 1 % Coupling factor as a power of 2 | 4: k' = 1 % Coupling factor as a power of 2 | |||
5: tshift = 50 ms % Time shift of TS-FIFO scheduler | 5: tshift = 50 ms % Time-shift of TS-FIFO scheduler | |||
6: % Constants derived from Classic AQM parameters | 6: % Constants derived from Classic AQM parameters | |||
7: k = 2^k' % Coupling factor from Equation (1) | 7: k = 2^k' % Coupling factor from equation (1) | |||
6: | 6: | |||
7: % Classic AQM parameters | 7: % Classic AQM parameters | |||
8: g_C = 5 % EWMA smoothing parameter as a power of 1/2 | 8: g_C = 5 % EWMA smoothing parameter as a power of 1/2 | |||
9: S_C = -1 % Classic ramp scaling factor as a power of 2 | 9: S_C = -1 % Classic ramp scaling factor as a power of 2 | |||
10: minTh = 500 ms % No Classic drop/mark below this queue delay | 10: minTh = 500 ms % No Classic drop/mark below this queue delay | |||
11: % Constants derived from Classic AQM parameters | 11: % Constants derived from Classic AQM parameters | |||
12: gamma = 2^(-g_C) % EWMA smoothing parameter | 12: gamma = 2^(-g_C) % EWMA smoothing parameter | |||
13: range_C = 2^S_C % Range of Classic ramp | 13: range_C = 2^S_C % Range of Classic ramp | |||
14: | 14: | |||
15: % L4S AQM parameters | 15: % L4S AQM parameters | |||
16: T = 1 ms % Queue delay threshold for native L4S AQM | 16: T = 1 ms % Queue delay threshold for Native L4S AQM | |||
17: % Constants derived from above parameters | 17: % Constants derived from above parameters | |||
18: S_L = S_C - k' % L4S ramp scaling factor as a power of 2 | 18: S_L = S_C - k' % L4S ramp scaling factor as a power of 2 | |||
19: range_L = 2^S_L % Range of L4S ramp | 19: range_L = 2^S_L % Range of L4S ramp | |||
20: } | 20: } | |||
]]></artwork> | ]]></sourcecode> | |||
</figure> | </figure> | |||
<figure anchor="dualq_fig_Algo_Real"> | <figure anchor="dualq_fig_Algo_Real"> | |||
<name>Example Dequeue Pseudocode for DualQ Coupled Curvy RED AQM</name > | <name>Example Dequeue Pseudocode for DualQ Coupled Curvy RED AQM</name > | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: cred_dequeue | <sourcecode><![CDATA[ | |||
(lq, cq, pkt) { % Couples L4S & Classic queues | 1: cred_dequeue(lq, cq, pkt) { % Couples L4S & Classic queues | |||
2: while ( lq.byt() + cq.byt() > 0 ) { | 2: while ( lq.byt() + cq.byt() > 0 ) { | |||
3: if ( scheduler() == lq ) { | 3: if ( scheduler() == lq ) { | |||
4: lq.dequeue(pkt) % L4S scheduled | 4: lq.dequeue(pkt) % L4S scheduled | |||
5a: p_CL = (Q_C - minTh) / range_L | 5a: p_CL = (Q_C - minTh) / range_L | |||
5b: if ( ( lq.time() > T ) | 5b: if ( ( lq.time() > T ) | |||
5c: OR ( p_CL > maxrand(U) ) ) | 5c: OR ( p_CL > maxrand(U) ) ) | |||
6: mark(pkt) | 6: mark(pkt) | |||
7: } else { | 7: } else { | |||
8: cq.dequeue(pkt) % Classic scheduled | 8: cq.dequeue(pkt) % Classic scheduled | |||
9a: Q_C = gamma * cq.time() + (1-gamma) * Q_C % Classic Q EWMA | 9a: Q_C = gamma * cq.time() + (1-gamma) * Q_C % Classic Q EWMA | |||
skipping to change at line 3260 ¶ | skipping to change at line 2647 ¶ | |||
25: maxr = max(maxr, rand()) % 0 <= rand() < 1 | 25: maxr = max(maxr, rand()) % 0 <= rand() < 1 | |||
26: return(maxr) | 26: return(maxr) | |||
27: } | 27: } | |||
28: scheduler() { | 28: scheduler() { | |||
29: if ( lq.time() + tshift >= cq.time() ) | 29: if ( lq.time() + tshift >= cq.time() ) | |||
30: return lq; | 30: return lq; | |||
31: else | 31: else | |||
32: return cq; | 32: return cq; | |||
33: } | 33: } | |||
]]></artwork> | ]]></sourcecode> | |||
</figure> | </figure> | |||
<t>The dequeue pseudocode (<xref target="dualq_fig_Algo_Real" format="de fault"/>) is | <t>The dequeue pseudocode (<xref target="dualq_fig_Algo_Real" format="de fault"/>) is | |||
repeatedly called whenever the lower layer is ready to forward a | repeatedly called whenever the lower layer is ready to forward a | |||
packet. It schedules one packet for dequeuing (or zero if the queue is | packet. It schedules one packet for dequeuing (or zero if the queue is | |||
empty) then returns control to the caller, so that it does not block | empty) then returns control to the caller so that it does not block | |||
while that packet is being forwarded. While making this dequeue | while that packet is being forwarded. While making this dequeue | |||
decision, it also makes the necessary AQM decisions on dropping or | decision, it also makes the necessary AQM decisions on dropping or | |||
marking. The alternative of applying the AQMs at enqueue would shift | marking. The alternative of applying the AQMs at enqueue would shift | |||
some processing from the critical time when each packet is dequeued. | some processing from the critical time when each packet is dequeued. | |||
However, it would also add a whole queue of delay to the control | However, it would also add a whole queue of delay to the control | |||
signals, making the control loop very sloppy.</t> | signals, making the control loop very sloppy.</t> | |||
<t>The code is written assuming the AQMs are applied on dequeue (Note | <t>The code is written assuming the AQMs are applied on dequeue | |||
<xref format="counter" target="dualq_note_dequeue"/>). All the dequeue | (<xref format="none" target="dualq_note_dequeue">Note 1</xref>). All the | |||
dequeue | ||||
code is contained within a large while loop so that if it decides to | code is contained within a large while loop so that if it decides to | |||
drop a packet, it will continue until it selects a packet to schedule. | drop a packet, it will continue until it selects a packet to schedule. | |||
If both queues are empty, the routine returns NULL at line 20. Line 3 | If both queues are empty, the routine returns NULL at line 20. Line 3 | |||
of the dequeue pseudocode is where the conditional priority scheduler | of the dequeue pseudocode is where the conditional priority scheduler | |||
chooses between the L4S queue (lq) and the Classic queue (cq). The | chooses between the L4S queue (lq) and the Classic queue (cq). The | |||
time-shifted FIFO scheduler is shown at lines 28-33, which would be | TS-FIFO scheduler is shown at lines 28-33, which would be | |||
suitable if simplicity is paramount (see Note <xref format="counter" tar | suitable if simplicity is paramount (see <xref format="none" target="dua | |||
get="dualq_note_conditional_priority"/>).</t> | lq_note_conditional_priority">Note 2</xref>).</t> | |||
<t>Within each queue, the decision whether to forward, drop or mark is | <t>Within each queue, the decision whether to forward, drop, or mark is | |||
taken as follows (to simplify the explanation, it is assumed that | taken as follows (to simplify the explanation, it is assumed that | |||
U=1):</t> | U = 1):</t> | |||
<dl newline="false" spacing="normal"> | <dl newline="true" spacing="normal"> | |||
<dt>L4S:</dt> | <dt>L4S:</dt> | |||
<dd> | <dd> | |||
<t>If the test at line 3 determines there is an | <t>If the test at line 3 determines there is an | |||
L4S packet to dequeue, the tests at lines 5b and 5c determine | L4S packet to dequeue, the tests at lines 5b and 5c determine | |||
whether to mark it. The first is a simple test of whether the L4S | whether to mark it. The first is a simple test of whether the L4S | |||
queue delay (lq.time()) is greater than a step threshold T (Note | queue delay (lq.time()) is greater than a step threshold T | |||
<xref format="counter" target="dualq_note_step"/>). The second | (<xref target="dualq_note_step" format="none">Note 3</xref>). The se | |||
test is similar to the random ECN marking in RED, but with the | cond | |||
test is similar to the random ECN marking in RED but with the | ||||
following differences: i) marking depends on queuing time, not | following differences: i) marking depends on queuing time, not | |||
bytes, in order to scale for any link rate without being | bytes, in order to scale for any link rate without being | |||
reconfigured; ii) marking of the L4S queue depends on a logical OR | reconfigured; ii) marking of the L4S queue depends on a logical OR | |||
of two tests; one against its own queuing time and one against the | of two tests: one against its own queuing time and one against the | |||
queuing time of the <em>other</em> (Classic) | queuing time of the <em>other</em> (Classic) | |||
queue; iii) the tests are against the instantaneous queuing time | queue; iii) the tests are against the instantaneous queuing time | |||
of the L4S queue, but a smoothed average of the other (Classic) | of the L4S queue but against a smoothed average of the other (Classi | |||
queue; iv) the queue is compared with the maximum of U random | c) | |||
numbers (but if U=1, this is the same as the single random number | queue; and iv) the queue is compared with the maximum of U random | |||
numbers (but if U = 1, this is the same as the single random number | ||||
used in RED).</t> | used in RED).</t> | |||
<t>Specifically, in line 5a the | <t>Specifically, in line 5a, the | |||
coupled marking probability p_CL is set to the amount by which the | coupled marking probability p_CL is set to the amount by which the | |||
averaged Classic queueing delay Q_C exceeds the minimum queuing | averaged Classic queuing delay Q_C exceeds the minimum queuing | |||
delay threshold (minTh) all divided by the L4S scaling parameter | delay threshold (minTh), all divided by the L4S scaling parameter | |||
range_L. range_L represents the queuing delay (in seconds) added | range_L. range_L represents the queuing delay (in seconds) added | |||
to minTh at which marking probability would hit 100%. Then in line | to minTh at which marking probability would hit 100%. Then, in line | |||
5c (if U=1) the result is compared with a uniformly distributed | 5c (if U = 1), the result is compared with a uniformly distributed | |||
random number between 0 and 1, which ensures that, over range_L, | random number between 0 and 1, which ensures that, over range_L, | |||
marking probability will linearly increase with queueing time.</t> | marking probability will linearly increase with queuing time.</t> | |||
</dd> | </dd> | |||
<dt>Classic:</dt> | <dt>Classic:</dt> | |||
<dd> | <dd> | |||
<t>If the scheduler at line 3 chooses to | <t>If the scheduler at line 3 chooses to | |||
dequeue a Classic packet and jumps to line 7, the test at line 10b | dequeue a Classic packet and jumps to line 7, the test at line 10b | |||
determines whether to drop or mark it. But before that, line 9a | determines whether to drop or mark it. But before that, line 9a | |||
updates Q_C, which is an exponentially weighted moving average | updates Q_C, which is an exponentially weighted moving average | |||
(Note <xref format="counter" target="dualq_note_non-EWMA"/>) of | (Note <xref format="counter" target="dualq_note_non-EWMA"/>) of | |||
the queuing time of the Classic queue, where cq.time() is the | the queuing time of the Classic queue, where cq.time() is the | |||
current instantaneous queueing time of the packet at the head of | current instantaneous queuing time of the packet at the head of | |||
the Classic queue (zero if empty) and gamma is the EWMA constant | the Classic queue (zero if empty), and gamma is the exponentially we | |||
(default 1/32, see line 12 of the initialization function). | ighted moving average (EWMA) constant | |||
(default 1/32; see line 12 of the initialization function). | ||||
</t> | </t> | |||
<t>Lines 10a and 10b implement the Classic | <t>Lines 10a and 10b implement the Classic | |||
AQM. In line 10a the averaged queuing time Q_C is divided by the | AQM. In line 10a, the averaged queuing time Q_C is divided by the | |||
Classic scaling parameter range_C, in the same way that queuing | Classic scaling parameter range_C, in the same way that queuing | |||
time was scaled for L4S marking. This scaled queuing time will be | time was scaled for L4S marking. This scaled queuing time will be | |||
squared to compute Classic drop probability so, before it is | squared to compute Classic drop probability. So, before it is | |||
squared, it is effectively the square root of the drop | squared, it is effectively the square root of the drop | |||
probability, hence it is given the variable name sqrt_p_C. The | probability; hence, it is given the variable name sqrt_p_C. The | |||
squaring is done by comparing it with the maximum out of two | squaring is done by comparing it with the maximum out of two | |||
random numbers (assuming U=1). Comparing it with the maximum out | random numbers (assuming U = 1). Comparing it with the maximum out | |||
of two is the same as the logical `AND' of two tests, which | of two is the same as the logical 'AND' of two tests, which | |||
ensures drop probability rises with the square of queuing | ensures drop probability rises with the square of queuing | |||
time.</t> | time.</t> | |||
</dd> | </dd> | |||
</dl> | </dl> | |||
<t>The AQM functions in each queue (lines 5c & 10b) are two cases | <t>The AQM functions in each queue (lines 5c and 10b) are two cases | |||
of a new generalization of RED called Curvy RED, motivated as follows. | of a new generalization of RED called 'Curvy RED', motivated as follows. | |||
When the performance of this AQM was compared with FQ-CoDel and PIE, | When the performance of this AQM was compared with FQ-CoDel and PIE, | |||
their goal of holding queuing delay to a fixed target seemed | their goal of holding queuing delay to a fixed target seemed | |||
misguided <xref target="CRED_Insights" format="default"/>. As the number of flows | misguided <xref target="CRED_Insights" format="default"/>. As the number of flows | |||
increases, if the AQM does not allow host congestion controllers to | increases, if the AQM does not allow host congestion controllers to | |||
increase queuing delay, it has to introduce abnormally high levels of | increase queuing delay, it has to introduce abnormally high levels of | |||
loss. Then loss rather than queuing becomes the dominant cause of | loss. Then loss rather than queuing becomes the dominant cause of | |||
delay for short flows, due to timeouts and tail losses.</t> | delay for short flows, due to timeouts and tail losses.</t> | |||
<t>Curvy RED constrains delay with a softened target that allows some | <t>Curvy RED constrains delay with a softened target that allows some | |||
increase in delay as load increases. This is achieved by increasing | increase in delay as load increases. This is achieved by increasing | |||
drop probability on a convex curve relative to queue growth (the | drop probability on a convex curve relative to queue growth (the | |||
square curve in the Classic queue, if U=1). Like RED, the curve hugs | square curve in the Classic queue, if U = 1). Like RED, the curve hugs | |||
the zero axis while the queue is shallow. Then, as load increases, it | the zero axis while the queue is shallow. Then, as load increases, it | |||
introduces a growing barrier to higher delay. But, unlike RED, it | introduces a growing barrier to higher delay. But, unlike RED, it | |||
requires only two parameters, not three. The disadvantage of Curvy RED | requires only two parameters, not three. The disadvantage of Curvy RED | |||
(compared to a PI controller for example) is that it is not adapted to | (compared to a PI controller, for example) is that it is not adapted to | |||
a wide range of RTTs. Curvy RED can be used as is when the RTT range | a wide range of RTTs. Curvy RED can be used as is when the RTT range | |||
to be supported is limited, otherwise an adaptation mechanism is | to be supported is limited; otherwise, an adaptation mechanism is | |||
needed.</t> | needed.</t> | |||
<t>From our limited experiments with Curvy RED so far, recommended | <t>From our limited experiments with Curvy RED so far, recommended | |||
values of these parameters are: S_C = -1; g_C = 5; T = 5 * MTU at the | values of these parameters are: S_C = -1; g_C = 5; T = 5 * MTU at the | |||
link rate (about 1ms at 60Mb/s) for the range of base RTTs typical on | link rate (about 1 ms at 60 Mb/s) for the range of base RTTs typical on | |||
the public Internet. <xref target="CRED_Insights" format="default"/> exp lains why these | the public Internet. <xref target="CRED_Insights" format="default"/> exp lains why these | |||
parameters are applicable whatever rate link this AQM implementation | parameters are applicable whatever rate link this AQM implementation | |||
is deployed on and how the parameters would need to be adjusted for a | is deployed on and how the parameters would need to be adjusted for a | |||
scenario with a different range of RTTs (e.g. a data centre). The | scenario with a different range of RTTs (e.g., a data centre). The | |||
setting of k depends on policy (see <xref target="dualq_norm_reqs" forma t="default"/> | setting of k depends on policy (see <xref target="dualq_norm_reqs" forma t="default"/> | |||
and <xref target="dualq_Choosing_k" format="default"/> respectively for its recommended | and <xref target="dualq_Choosing_k" format="default"/>, respectively, fo r its recommended | |||
setting and guidance on alternatives).</t> | setting and guidance on alternatives).</t> | |||
<t>There is also a cUrviness parameter, U, which is a small positive | <t>There is also a cUrviness parameter, U, which is a small positive | |||
integer. It is likely to take the same hard-coded value for all | integer. It is likely to take the same hard-coded value for all | |||
implementations, once experiments have determined a good value. Only | implementations, once experiments have determined a good value. Only | |||
U=1 has been used in experiments so far, but results might be even | U = 1 has been used in experiments so far, but results might be even | |||
better with U=2 or higher.</t> | better with U = 2 or higher.</t> | |||
<t>Notes:</t> | <t>Notes:</t> | |||
<ol spacing="normal" type="1"><li anchor="dualq_note_dequeue">The altern | <ol spacing="normal" type="1"> | |||
ative of applying the | <li anchor="dualq_note_dequeue">The alternative of applying the | |||
AQMs at enqueue would shift some processing from the critical time | AQMs at enqueue would shift some processing from the critical time | |||
when each packet is dequeued. However, it would also add a whole | when each packet is dequeued. However, it would also add a whole | |||
queue of delay to the control signals, making the control loop | queue of delay to the control signals, making the control loop | |||
sloppier (for a typical RTT it would double the Classic queue's | sloppier (for a typical RTT, it would double the Classic queue's | |||
feedback delay). On a platform where packet timestamping is | feedback delay). On a platform where packet timestamping is | |||
feasible, e.g. Linux, it is also easiest to apply the AQMs at | feasible, e.g., Linux, it is also easiest to apply the AQMs at | |||
dequeue because that is where queuing time is also measured.</li> | dequeue, because that is where queuing time is also measured.</li> | |||
<li anchor="dualq_note_conditional_priority">WRR better isolates | <li anchor="dualq_note_conditional_priority">WRR better isolates | |||
the L4S queue from large delay bursts in the Classic queue, but it | the L4S queue from large delay bursts in the Classic queue, but it | |||
is slightly less simple than TS-FIFO. If WRR were used, a low | is slightly less simple than TS-FIFO. If WRR were used, a low | |||
default Classic weight (e.g. 1/16) would need to be | default Classic weight (e.g., 1/16) would need to be | |||
configured in place of the time shift in line 5 of the | configured in place of the time-shift in line 5 of the | |||
initialization function (<xref target="dualq_fig_Algo_cred_core_head er" format="default"/>).</li> | initialization function (<xref target="dualq_fig_Algo_cred_core_head er" format="default"/>).</li> | |||
<li anchor="dualq_note_step">A step function is shown for | <li anchor="dualq_note_step">A step function is shown for | |||
simplicity. A ramp function (see <xref target="dualq_fig_Algo_laqm_c ore" format="default"/> and the discussion around it | simplicity. A ramp function (see <xref target="dualq_fig_Algo_laqm_c ore" format="default"/> and the discussion around it | |||
in <xref target="dualq_Ex_algo_pi2-1" format="default"/>) is recomme nded, because | in <xref target="dualq_Ex_algo_pi2-1" format="default"/>) is recomme nded, because | |||
it is more general than a step and has the potential to enable L4S | it is more general than a step and has the potential to enable L4S | |||
congestion controls to converge more rapidly.</li> | congestion controls to converge more rapidly.</li> | |||
<li anchor="dualq_note_non-EWMA">An EWMA is only one possible way | <li anchor="dualq_note_non-EWMA">An EWMA is only one possible way | |||
to filter bursts; other more adaptive smoothing methods could be | to filter bursts; other more adaptive smoothing methods could be | |||
valid and it might be appropriate to decrease the EWMA faster than | valid, and it might be appropriate to decrease the EWMA faster than | |||
it increases, e.g. by using the minimum of the smoothed and | it increases, e.g., by using the minimum of the smoothed and | |||
instantaneous queue delays, min(Q_C, qc.time()).</li> | instantaneous queue delays, min(Q_C, qc.time()).</li> | |||
</ol> | </ol> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Efficient Implementation of Curvy RED</name> | <name>Efficient Implementation of Curvy RED</name> | |||
<t>Although code optimization depends on the platform, the following | <t>Although code optimization depends on the platform, the following | |||
notes explain where the design of Curvy RED was particularly motivated | notes explain where the design of Curvy RED was particularly motivated | |||
by efficient implementation.</t> | by efficient implementation.</t> | |||
<t>The Classic AQM at line 10b calls maxrand(2*U), which gives twice | <t>The Classic AQM at line 10b in <xref target="dualq_fig_Algo_Real" for mat="default"/> calls maxrand(2*U), which gives twice | |||
as much curviness as the call to maxrand(U) in the marking function at | as much curviness as the call to maxrand(U) in the marking function at | |||
line 5c. This is the trick that implements the square rule in equation | line 5c. This is the trick that implements the square rule in equation | |||
(1) (<xref target="dualq_coupled" format="default"/>). This is based on the fact that, | (1) (<xref target="dualq_coupled" format="default"/>). This is based on the fact that, | |||
given a number X from 1 to 6, the probability that two dice throws | given a number X from 1 to 6, the probability that two dice throws | |||
will both be less than X is the square of the probability that one | will both be less than X is the square of the probability that one | |||
throw will be less than X. So, when U=1, the L4S marking function is | throw will be less than X. | |||
linear and the Classic dropping function is squared. If U=2, L4S would | So, when U = 1, the L4S marking function is | |||
linear and the Classic dropping function is squared. If U = 2, L4S would | ||||
be a square function and Classic would be quartic. And so on.</t> | be a square function and Classic would be quartic. And so on.</t> | |||
<t>The maxrand(u) function in lines 16-21 simply generates u random | <t>The maxrand(u) function in lines 22-27 simply generates u random | |||
numbers and returns the maximum. Typically, maxrand(u) could be run in | numbers and returns the maximum. Typically, maxrand(u) could be run in | |||
parallel out of band. For instance, if U=1, the Classic queue would | parallel out of band. For instance, if U = 1, the Classic queue would | |||
require the maximum of two random numbers. So, instead of calling | require the maximum of two random numbers. So, instead of calling | |||
maxrand(2*U) in-band, the maximum of every pair of values from a | maxrand(2*U) in-band, the maximum of every pair of values from a | |||
pseudorandom number generator could be generated out-of-band, and held | pseudorandom number generator could be generated out of band and held | |||
in a buffer ready for the Classic queue to consume.</t> | in a buffer ready for the Classic queue to consume.</t> | |||
<figure anchor="dualq_fig_Algo_Int"> | <figure anchor="dualq_fig_Algo_Int"> | |||
<name>Optimised Example Dequeue Pseudocode for DualQ Coupled AQM using Integer Arithmetic</name> | <name>Optimised Example Dequeue Pseudocode for DualQ Coupled AQM using Integer Arithmetic</name> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[1: cred_dequeue | <sourcecode><![CDATA[ | |||
(lq, cq, pkt) { % Couples L4S & Classic queues | 1: cred_dequeue(lq, cq, pkt) { % Couples L4S & Classic queues | |||
2: while ( lq.byt() + cq.byt() > 0 ) { | 2: while ( lq.byt() + cq.byt() > 0 ) { | |||
3: if ( scheduler() == lq ) { | 3: if ( scheduler() == lq ) { | |||
4: lq.dequeue(pkt) % L4S scheduled | 4: lq.dequeue(pkt) % L4S scheduled | |||
5: if ((lq.time() > T) OR (Q_C >> (S_L-2) > maxrand(U))) | 5: if ((lq.time() > T) OR (Q_C >> (S_L-2) > maxrand(U))) | |||
6: mark(pkt) | 6: mark(pkt) | |||
7: } else { | 7: } else { | |||
8: cq.dequeue(pkt) % Classic scheduled | 8: cq.dequeue(pkt) % Classic scheduled | |||
9: Q_C += (qc.ns() - Q_C) >> g_C % Classic Q EWMA | 9: Q_C += (qc.ns() - Q_C) >> g_C % Classic Q EWMA | |||
10: if ( (Q_C >> (S_C-2) ) > maxrand(2*U) ) { | 10: if ( (Q_C >> (S_C-2) ) > maxrand(2*U) ) { | |||
11: if ( (ecn(pkt) == 0) { % ECN field = not-ECT | 11: if ( (ecn(pkt) == 0) { % ECN field = not-ECT | |||
12: drop(pkt) % Squared drop, redo loop | 12: drop(pkt) % Squared drop, redo loop | |||
13: continue % continue to the top of the while loop | 13: continue % continue to the top of the while loop | |||
14: } | 14: } | |||
15: mark(pkt) | 15: mark(pkt) | |||
16: } | 16: } | |||
17: } | 17: } | |||
18: return(pkt) % return the packet and stop here | 18: return(pkt) % return the packet and stop here | |||
19: } | 19: } | |||
20: return(NULL) % no packet to dequeue | 20: return(NULL) % no packet to dequeue | |||
21: } | 21: } | |||
]]></artwork> | ]]></sourcecode> | |||
</figure> | </figure> | |||
<t>The two ranges, range_L and range_C are expressed as powers of 2 so | <t>The two ranges, range_L and range_C, are expressed as powers of 2 so | |||
that division can be implemented as a right bit-shift (>>) in | that division can be implemented as a right bit-shift (>>) in | |||
lines 5 and 10 of the integer variant of the pseudocode (<xref target="d ualq_fig_Algo_Int" format="default"/>).</t> | lines 5 and 10 of the integer variant of the pseudocode (<xref target="d ualq_fig_Algo_Int" format="default"/>).</t> | |||
<t>For the integer variant of the pseudocode, an integer version of | <t>For the integer variant of the pseudocode, an integer version of | |||
the rand() function used at line 25 of the maxrand(function) in <xref ta rget="dualq_fig_Algo_Real" format="default"/> would be arranged to return an int eger | the rand() function used at line 25 of the maxrand() function in <xref t arget="dualq_fig_Algo_Real" format="default"/> would be arranged to return an in teger | |||
in the range 0 <= maxrand() < 2^32 (not shown). This would scale | in the range 0 <= maxrand() < 2^32 (not shown). This would scale | |||
up all the floating point probabilities in the range [0,1] by | up all the floating point probabilities in the range [0,1] by | |||
2^32.</t> | 2^32.</t> | |||
<t>Queuing delays are also scaled up by 2^32, but in two stages: i) In | <t>Queuing delays are also scaled up by 2^32, but in two stages: i) in | |||
line 9 queuing time qc.ns() is returned in integer nanoseconds, making | line 9, queuing time qc.ns() is returned in integer nanoseconds, making | |||
the value about 2^30 times larger than when the units were seconds, | the value about 2^30 times larger than when the units were seconds, and | |||
ii) then in lines 5 and 10 an adjustment of -2 to the right bit-shift | then | |||
ii) in lines 5 and 10, an adjustment of -2 to the right bit-shift | ||||
multiplies the result by 2^2, to complete the scaling by 2^32.</t> | multiplies the result by 2^2, to complete the scaling by 2^32.</t> | |||
<t>In line 8 of the initialization function, the EWMA constant gamma | <t>In line 8 of the initialization function, the EWMA constant gamma | |||
is represented as an integer power of 2, g_C, so that in line 9 of the | is represented as an integer power of 2, g_C, so that in line 9 of the | |||
integer code the division needed to weight the moving average can be | integer code (<xref target="dualq_fig_Algo_Int" format="default"/>), the division needed to weight the moving average can be | |||
implemented by a right bit-shift (>> g_C).</t> | implemented by a right bit-shift (>> g_C).</t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
<name>Choice of Coupling Factor, k</name> | <name>Choice of Coupling Factor, k</name> | |||
<t/> | <t/> | |||
<section anchor="dualq_rtt-dependence" numbered="true" toc="default"> | <section anchor="dualq_rtt-dependence" numbered="true" toc="default"> | |||
<name>RTT-Dependence</name> | <name>RTT-Dependence</name> | |||
<t>Where Classic flows compete for the same capacity, their relative | <t>Where Classic flows compete for the same capacity, their relative | |||
flow rates depend not only on the congestion probability, but also on | flow rates depend not only on the congestion probability but also on | |||
their end-to-end RTT (= base RTT + queue delay). The rates of | their end-to-end RTT (= base RTT + queue delay). The rates of | |||
Reno <xref target="RFC5681" format="default"/> flows competing over an A | Reno <xref target="RFC5681" format="default"/> flows competing over an A | |||
QM are | QM are | |||
roughly inversely proportional to their RTTs. Cubic exhibits similar | roughly inversely proportional to their RTTs. CUBIC exhibits similar | |||
RTT-dependence when in Reno-compatibility mode, but it is less | RTT-dependence when in Reno-friendly mode, but it is less | |||
RTT-dependent otherwise.</t> | RTT-dependent otherwise.</t> | |||
<t>Until the early experiments with the DualQ Coupled AQM, the | <t>Until the early experiments with the DualQ Coupled AQM, the | |||
importance of the reasonably large Classic queue in mitigating | importance of the reasonably large Classic queue in mitigating | |||
RTT-dependence when the base RTT is low had not been appreciated. | RTT-dependence when the base RTT is low had not been appreciated. | |||
Appendix A.1.6 of the L4S ECN protocol <xref target="I-D.ietf-tsvwg-ecn- | Appendix <xref target="RFC9331" sectionFormat="bare" section="A.1.6"/> | |||
l4s-id" format="default"/> uses numerical examples to | of the L4S ECN Protocol <xref target="RFC9331" format="default"/> uses n | |||
umerical examples to | ||||
explain why bloated buffers had concealed the RTT-dependence of | explain why bloated buffers had concealed the RTT-dependence of | |||
Classic congestion controls before that time. Then it explains why, | Classic congestion controls before that time. | |||
Then, it explains why, | ||||
the more that queuing delays have reduced, the more that | the more that queuing delays have reduced, the more that | |||
RTT-dependence has surfaced as a potential starvation problem for long | RTT-dependence has surfaced as a potential starvation problem for long | |||
RTT flows, when competing against very short RTT flows.</t> | RTT flows, when competing against very short RTT flows.</t> | |||
<t>Given that congestion control on end-systems is voluntary, there is | <t>Given that congestion control on end systems is voluntary, there is | |||
no reason why it has to be voluntarily RTT-dependent. The | no reason why it has to be voluntarily RTT-dependent. The | |||
RTT-dependence of existing Classic traffic cannot be 'undeployed'. | RTT-dependence of existing Classic traffic cannot be 'undeployed'. | |||
Therefore, <xref target="I-D.ietf-tsvwg-ecn-l4s-id" format="default"/> r equires L4S | Therefore, <xref target="RFC9331" format="default"/> requires L4S | |||
congestion controls to be significantly less RTT-dependent than the | congestion controls to be significantly less RTT-dependent than the | |||
standard Reno congestion control <xref target="RFC5681" format="default" />, at | standard Reno congestion control <xref target="RFC5681" format="default" />, at | |||
least at low RTT. Then RTT-dependence ought to be no worse than it is | least at low RTT. Then RTT-dependence ought to be no worse than it is | |||
with appropriately sized Classic buffers. Following this approach | with appropriately sized Classic buffers. Following this approach | |||
means there is no need for network devices to address RTT-dependence, | means there is no need for network devices to address RTT-dependence, | |||
although there would be no harm if they did, which per-flow queuing | although there would be no harm if they did, which per-flow queuing | |||
inherently does.</t> | inherently does.</t> | |||
</section> | </section> | |||
<section anchor="dualq_Choosing_k" numbered="true" toc="default"> | <section anchor="dualq_Choosing_k" numbered="true" toc="default"> | |||
<name>Guidance on Controlling Throughput Equivalence</name> | <name>Guidance on Controlling Throughput Equivalence</name> | |||
<t>The coupling factor, k, determines the balance between L4S and | <t>The coupling factor, k, determines the balance between L4S and | |||
Classic flow rates (see <xref target="dualq_config" format="default"/> a nd equation | Classic flow rates (see <xref target="dualq_config" format="default"/> a nd equation | |||
(1)).</t> | (1) in <xref target="dualq_coupled" format="default"/>).</t> | |||
<t>For the public Internet, a coupling factor of k=2 is recommended, | <t>For the public Internet, a coupling factor of k = 2 is recommended | |||
and justified below. For scenarios other than the public Internet, a | and justified below. For scenarios other than the public Internet, a | |||
good coupling factor can be derived by plugging the appropriate | good coupling factor can be derived by plugging the appropriate | |||
numbers into the same working.</t> | numbers into the same working.</t> | |||
<t>To summarize the maths below, from equation (7) it can be seen that | <t>To summarize the maths below, from equation (7) it can be seen that | |||
choosing k=1.64 would theoretically make L4S throughput roughly the | choosing k = 1.64 would theoretically make L4S throughput roughly the | |||
same as Classic, <em>if their actual end-to-end RTTs were the same</em>. | same as Classic, <em>if their actual end-to-end RTTs were the same</em>. | |||
However, even if the base RTTs are the same, the actual RTTs are | However, even if the base RTTs are the same, the actual RTTs are | |||
unlikely to be the same, because Classic traffic needs a fairly large | unlikely to be the same, because Classic traffic needs a fairly large | |||
queue to avoid under-utilization and excess drop. Whereas L4S does | queue to avoid underutilization and excess drop, whereas L4S does | |||
not.</t> | not.</t> | |||
<t>Therefore, to determine the appropriate coupling factor policy, the | <t>Therefore, to determine the appropriate coupling factor policy, the | |||
operator needs to decide at what base RTT it wants L4S and Classic | operator needs to decide at what base RTT it wants L4S and Classic | |||
flows to have roughly equal throughput, once the effect of the | flows to have roughly equal throughput, once the effect of the | |||
additional Classic queue on Classic throughput has been taken into | additional Classic queue on Classic throughput has been taken into | |||
account. With this approach, a network operator can determine a good | account. With this approach, a network operator can determine a good | |||
coupling factor without knowing the precise L4S algorithm for reducing | coupling factor without knowing the precise L4S algorithm for reducing | |||
RTT-dependence - or even in the absence of any algorithm.</t> | RTT-dependence -- or even in the absence of any algorithm.</t> | |||
<t>The following additional terminology will be used, with appropriate | <t>The following additional terminology will be used, with appropriate | |||
subscripts:</t> | subscripts:</t> | |||
<dl newline="false" spacing="normal"> | <dl newline="false" spacing="normal"> | |||
<dt>r:</dt> | <dt>r:</dt> | |||
<dd>Packet rate [pkt/s]</dd> | <dd>Packet rate [pkt/s]</dd> | |||
<dt>R:</dt> | <dt>R:</dt> | |||
<dd>RTT [s/round]</dd> | <dd>RTT [s/round]</dd> | |||
<dt>p:</dt> | <dt>p:</dt> | |||
<dd>ECN marking probability []</dd> | <dd>ECN-marking probability []</dd> | |||
</dl> | </dl> | |||
<t>On the Classic side, we consider Reno as the most sensitive and | <t>On the Classic side, we consider Reno as the most sensitive and | |||
therefore worst-case Classic congestion control. We will also consider | therefore worst-case Classic congestion control. We will also consider | |||
Cubic in its Reno-friendly mode ('CReno'), as the most prevalent | CUBIC in its Reno-friendly mode ('CReno') as the most prevalent | |||
congestion control, according to the references and analysis in <xref ta rget="PI2param" format="default"/>. In either case, the Classic packet rate in s teady | congestion control, according to the references and analysis in <xref ta rget="PI2param" format="default"/>. In either case, the Classic packet rate in s teady | |||
state is given by the well-known square root formula for Reno | state is given by the well-known square root formula for Reno | |||
congestion control:</t> | congestion control:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ r_C = 1.22 / ( | <sourcecode><![CDATA[ | |||
R_C * p_C^0.5) (5)]]></artwork> | r_C = 1.22 / (R_C * p_C^0.5) (5)]]></sourcecode> | |||
<t>On the L4S side, we consider the Prague congestion | <t>On the L4S side, we consider the Prague congestion | |||
control <xref target="I-D.briscoe-iccrg-prague-congestion-control" forma t="default"/> as the | control <xref target="I-D.briscoe-iccrg-prague-congestion-control" forma t="default"/> as the | |||
reference for steady-state dependence on congestion. Prague conforms | reference for steady-state dependence on congestion. Prague conforms | |||
to the same equation as DCTCP, but we do not use the equation derived | to the same equation as DCTCP, but we do not use the equation derived | |||
in the DCTCP paper, which is only appropriate for step marking. The | in the DCTCP paper, which is only appropriate for step marking. The | |||
coupled marking, p_CL, is the appropriate one when considering | coupled marking, p_CL, is the appropriate one when considering | |||
throughput equivalence with Classic flows. Unlike step marking, | throughput equivalence with Classic flows. Unlike step marking, | |||
coupled markings are inherently spaced out, so we use the formula for | coupled markings are inherently spaced out, so we use the formula for | |||
DCTCP packet rate with probabilistic marking derived in Appendix A of | DCTCP packet rate with probabilistic marking derived in Appendix A of | |||
<xref target="PI2" format="default"/>. We use the equation without RTT-i ndependence | <xref target="PI2" format="default"/>. We use the equation without RTT-i ndependence | |||
enabled, which will be explained later.</t> | enabled, which will be explained later.</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ r_L = 2 / (R_L | <sourcecode><![CDATA[ | |||
* p_CL) (6)]]></artwork> | r_L = 2 / (R_L * p_CL) (6)]]></sourcecode> | |||
<t>For packet rate equivalence, we equate the two packet rates and | <t>For packet rate equivalence, we equate the two packet rates and | |||
rearrange into the same form as Equation (1), so the two can be | rearrange the equation into the same form as equation (1) (copied from < xref target="dualq_coupled" format="default"/>) so the two can be | |||
equated and simplified to produce a formula for a theoretical coupling | equated and simplified to produce a formula for a theoretical coupling | |||
factor, which we shall call k*:</t> | factor, which we shall call k*:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ r_c = r_L | <sourcecode><![CDATA[ | |||
=> p_C = (p_CL/1.64 * R_L/R_C)^2 | r_c = r_L | |||
=> p_C = (p_CL/1.64 * R_L/R_C)^2. | ||||
p_C = ( p_CL / k )^2 (1) | p_C = ( p_CL / k )^2. (1) | |||
k* = 1.64 * (R_C / R_L) (7) | k* = 1.64 * (R_C / R_L). (7) | |||
]]></artwork> | ]]></sourcecode> | |||
<t>We say that this coupling factor is theoretical, because it is in | <t>We say that this coupling factor is theoretical, because it is in | |||
terms of two RTTs, which raises two practical questions: i) for | terms of two RTTs, which raises two practical questions: i) for | |||
multiple flows with different RTTs, the RTT for each traffic class | multiple flows with different RTTs, the RTT for each traffic class | |||
would have to be derived from the RTTs of all the flows in that class | would have to be derived from the RTTs of all the flows in that class | |||
(actually the harmonic mean would be needed); ii) a network node | (actually the harmonic mean would be needed) and ii) a network node | |||
cannot easily know the RTT of the flows anyway.</t> | cannot easily know the RTT of the flows anyway.</t> | |||
<t>RTT-dependence is caused by window-based congestion control, so it | <t>RTT-dependence is caused by window-based congestion control, so it | |||
ought to be reversed there, not in the network. Therefore, we use a | ought to be reversed there, not in the network. Therefore, we use a | |||
fixed coupling factor in the network, and reduce RTT-dependence in L4S | fixed coupling factor in the network and reduce RTT-dependence in L4S | |||
senders. We cannot expect Classic senders to all be updated to reduce | senders. We cannot expect Classic senders to all be updated to reduce | |||
their RTT-dependence. But solely addressing the problem in L4S senders | their RTT-dependence. But solely addressing the problem in L4S senders | |||
at least makes RTT-dependence no worse - not just between L4S senders, | at least makes RTT-dependence no worse -- not just between L4S senders, | |||
but also between L4S and Classic senders.</t> | but also between L4S and Classic senders.</t> | |||
<t>Traditionally, throughput equivalence has been defined for flows | <t>Throughput equivalence is defined for flows | |||
under comparable conditions, including with the same base | under comparable conditions, including with the same base | |||
RTT <xref target="RFC2914" format="default"/>. So if we assume the same base RTT, | RTT <xref target="RFC2914" format="default"/>. So if we assume the same base RTT, | |||
R_b, for comparable flows, we can put both R_C and R_L in terms of | R_b, for comparable flows, we can put both R_C and R_L in terms of | |||
R_b.</t> | R_b.</t> | |||
<t>We can approximate the L4S RTT to be hardly greater than the base | <t>We can approximate the L4S RTT to be hardly greater than the base | |||
RTT, i.e. R_L ~= R_b. And we can replace R_C with (R_b + q_C), | RTT, i.e., R_L ~= R_b. And we can replace R_C with (R_b + q_C), | |||
where the Classic queue, q_C, depends on the target queue delay that | where the Classic queue, q_C, depends on the target queue delay that | |||
the operator has configured for the Classic AQM.</t> | the operator has configured for the Classic AQM.</t> | |||
<t>Taking PI2 as an example Classic AQM, it seems that we could just | <t>Taking PI2 as an example Classic AQM, it seems that we could just | |||
take R_C = R_b + target (recommended 15 ms by default in <xref target="d ualq_Ex_algo_pi2-1" format="default"/>). However, target is roughly the queue | take R_C = R_b + target (recommended 15 ms by default in <xref target="d ualq_Ex_algo_pi2-1" format="default"/>). However, target is roughly the queue | |||
depth reached by the tips of the sawteeth of a congestion control, not | depth reached by the tips of the sawteeth of a congestion control, not | |||
the average <xref target="PI2param" format="default"/>. That is R_max = R_b + | the average <xref target="PI2param" format="default"/>. That is R_max = R_b + | |||
target.</t> | target.</t> | |||
<t>The position of the average in relation to the max depends on the | <t>The position of the average in relation to the max depends on the | |||
amplitude and geometry of the sawteeth. We consider two examples: | amplitude and geometry of the sawteeth. We consider two examples: | |||
Reno <xref target="RFC5681" format="default"/>, as the most sensitive wo | Reno <xref target="RFC5681" format="default"/>, as the most sensitive wo | |||
rst-case, | rst case, | |||
and Cubic <xref target="RFC8312" format="default"/> in its Reno-friendly | and CUBIC <xref target="RFC8312" format="default"/> in its Reno-friendly | |||
mode | mode | |||
('CReno') as the most prevalent congestion control algorithm on the | ('CReno') as the most prevalent congestion control algorithm on the | |||
Internet according to the references in <xref target="PI2param" format=" default"/>. | Internet according to the references in <xref target="PI2param" format=" default"/>. | |||
Both are AIMD, so we will generalize using b as the multiplicative | Both are Additive Increase Multiplicative Decrease (AIMD), so we will ge | |||
decrease factor (b_r = 0.5 for Reno, b_c = 0.7 for CReno). Then:</t> | neralize using b as the multiplicative | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ R_C = (R_max + | decrease factor (b_r = 0.5 for Reno, b_c = 0.7 for CReno). Then</t> | |||
b*R_max) / 2 | <sourcecode><![CDATA[ | |||
= R_max * (1+b)/2 | R_C = (R_max + b*R_max) / 2 | |||
= R_max * (1+b)/2. | ||||
R_reno = 0.75 * (R_b + target); R_creno = 0.85 * (R_b + target). | R_reno = 0.75 * (R_b + target); R_creno = 0.85 * (R_b + target). | |||
(8) | (8) | |||
]]></artwork> | ]]></sourcecode> | |||
<t>Plugging all this into equation (7) we get a fixed coupling factor | ||||
<t>Plugging all this into equation (7), at any particular base RTT, R_b, | ||||
we get a fixed coupling factor | ||||
for each:</t> | for each:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[k_reno = 1.64*0.75 | <sourcecode><![CDATA[ | |||
*(R_b+target)/R_b | k_reno = 1.64*0.75*(R_b+target)/R_b | |||
= 1.23*(1 + target/R_b); k_creno = 1.39 * (1 + target/R_b) | = 1.23*(1 + target/R_b); k_creno = 1.39 * (1 + target/R_b). | |||
]]></artwork> | ]]></sourcecode> | |||
<t>An operator can then choose the base RTT at which it wants | <t>An operator can then choose the base RTT at which it wants | |||
throughput to be equivalent. For instance, if we recommend that the | throughput to be equivalent. For instance, if we recommend that the | |||
operator chooses R_b = 25 ms, as a typical base RTT between Internet | operator chooses R_b = 25 ms, as a typical base RTT between Internet | |||
users and CDNs <xref target="PI2param" format="default"/>, then these co upling | users and CDNs <xref target="PI2param" format="default"/>, then these co upling | |||
factors become:</t> | factors become:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[k_reno = 1.23 * (1 | <sourcecode><![CDATA[ | |||
+ 15/25) k_creno = 1.39 * (1 + 15/25) | k_reno = 1.23 * (1 + 15/25) k_creno = 1.39 * (1 + 15/25) | |||
= 1.97 = 2.22 | = 1.97 = 2.22 | |||
~= 2 ~= 2 (9) | ~= 2. ~= 2. (9) | |||
]]></artwork> | ]]></sourcecode> | |||
<t>The approximation is relevant to any of the above example DualQ | <t>The approximation is relevant to any of the above example DualQ | |||
Coupled algorithms, which use a coupling factor that is an integer | Coupled algorithms, which use a coupling factor that is an integer | |||
power of 2 to aid efficient implementation. It also fits best to the | power of 2 to aid efficient implementation. It also fits best for the | |||
worst case (Reno).</t> | worst case (Reno).</t> | |||
<t>To check the outcome of this coupling factor, we can express the | <t>To check the outcome of this coupling factor, we can express the | |||
ratio of L4S to Classic throughput by substituting from their rate | ratio of L4S to Classic throughput by substituting from their rate | |||
equations (5) and (6), then also substituting for p_C in terms of | equations (5) and (6), then also substituting for p_C in terms of | |||
p_CL, using equation (1) with k=2 as just determined for the | p_CL using equation (1) with k = 2 as just determined for the | |||
Internet:</t> | Internet:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[r_L / r_C = 2 (R_ | <sourcecode><![CDATA[ | |||
C * p_C^0.5) / 1.22 (R_L * p_CL) | r_L / r_C = 2 (R_C * p_C^0.5) / 1.22 (R_L * p_CL) | |||
= (R_C * p_CL) / (1.22 * R_L * p_CL) | = (R_C * p_CL) / (1.22 * R_L * p_CL) | |||
= R_C / (1.22 * R_L) (10) | = R_C / (1.22 * R_L). (10) | |||
]]></artwork> | ]]></sourcecode> | |||
<t>As an example, we can then consider single competing CReno and | <t>As an example, we can then consider single competing CReno and | |||
Prague flows, by expressing both their RTTs in (10) in terms of their | Prague flows, by expressing both their RTTs in (10) in terms of their | |||
base RTTs, R_bC and R_bL. So R_C is replaced by equation (8) for | base RTTs, R_bC and R_bL. So R_C is replaced by equation (8) for | |||
CReno. And R_L is replaced by the max() function below, which | CReno. And R_L is replaced by the max() function below, which | |||
represents the effective RTT of the current Prague congestion | represents the effective RTT of the current Prague congestion | |||
control <xref target="I-D.briscoe-iccrg-prague-congestion-control" forma t="default"/> in its | control <xref target="I-D.briscoe-iccrg-prague-congestion-control" forma t="default"/> in its | |||
(default) RTT-independent mode, because it sets a floor to the | (default) RTT-independent mode, because it sets a floor to the | |||
effective RTT that it uses for additive increase:</t> | effective RTT that it uses for additive increase:</t> | |||
<artwork name="" type="" align="left" alt=""><![CDATA[ ~= 0.85 | <sourcecode><![CDATA[ | |||
* (R_bC + target) / (1.22 * max(R_bL, R_typ)) | r_L / r_C ~= 0.85 * (R_bC + target) / (1.22 * max(R_bL, R_typ)) | |||
~= (R_bC + target) / (1.4 * max(R_bL, R_typ)) | ~= (R_bC + target) / (1.4 * max(R_bL, R_typ)). | |||
]]></artwork> | ]]></sourcecode> | |||
<t>It can be seen that, for base RTTs below target (15 ms), both the | <t>It can be seen that, for base RTTs below target (15 ms), both the | |||
numerator and the denominator plateau, which has the desired effect of | numerator and the denominator plateau, which has the desired effect of | |||
limiting RTT-dependence.</t> | limiting RTT-dependence.</t> | |||
<t>At the start of the above derivations, an explanation was promised | <t>At the start of the above derivations, an explanation was promised | |||
for why the L4S throughput equation in equation (6) did not need to | for why the L4S throughput equation in equation (6) did not need to | |||
model RTT-independence. This is because we only use one point - at the | model RTT-independence. This is because we only use one point -- at the | |||
typical base RTT where the operator chooses to calculate the coupling | typical base RTT where the operator chooses to calculate the coupling | |||
factor. Then, throughput equivalence will at least hold at that chosen | factor. Then throughput equivalence will at least hold at that chosen | |||
point. Nonetheless, assuming Prague senders implement RTT-independence | point. Nonetheless, assuming Prague senders implement RTT-independence | |||
over a range of RTTs below this, the throughput equivalence will then | over a range of RTTs below this, the throughput equivalence will then | |||
extend over that range as well.</t> | extend over that range as well.</t> | |||
<t>Congestion control designers can choose different ways to reduce | <t>Congestion control designers can choose different ways to reduce | |||
RTT-dependence. And each operator can make a policy choice to decide | RTT-dependence. And each operator can make a policy choice to decide | |||
on a different base RTT, and therefore a different k, at which it | on a different base RTT, and therefore a different k, at which it | |||
wants throughput equivalence. Nonetheless, for the Internet, it makes | wants throughput equivalence. Nonetheless, for the Internet, it makes | |||
sense to choose what is believed to be the typical RTT most users | sense to choose what is believed to be the typical RTT most users | |||
experience, because a Classic AQM's target queuing delay is also | experience, because a Classic AQM's target queuing delay is also | |||
derived from a typical RTT for the Internet.</t> | derived from a typical RTT for the Internet.</t> | |||
<t>As a non-Internet example, for localized traffic from a particular | <t>As a non-Internet example, for localized traffic from a particular | |||
ISP's data centre, using the measured RTTs, it was calculated that a | ISP's data centre, using the measured RTTs, it was calculated that a | |||
value of k = 8 would achieve throughput equivalence, and experiments | value of k = 8 would achieve throughput equivalence, and experiments | |||
verified the formula very closely.</t> | verified the formula very closely.</t> | |||
<t>But, for a typical mix of RTTs across the general Internet, a value | <t>But, for a typical mix of RTTs across the general Internet, a value | |||
of k=2 is recommended as a good workable compromise.</t> | of k = 2 is recommended as a good workable compromise.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
<!-- <section title="Open Issues"> | ||||
<t>Minor open issues are tagged '{ToDo}' at the appropriate point in the | ||||
document. Major open issues are listed below:<list> | ||||
<t>None</t> | ||||
</list></t> | ||||
</section> | ||||
<section title="Change Log (to be Deleted before Publication)"> | ||||
<t>A detailed version history can be accessed at | ||||
<http://datatracker.ietf.org/doc/draft-briscoe-aqm-ecn-roadmap/history/ | ||||
></t> | ||||
<t><list style="hanging"> | ||||
<t hangText="From briscoe-...-00 to briscoe-...-01:">Technical | ||||
changes:<list style="symbols"> | ||||
<t/> | ||||
</list>Editorial changes:<list style="symbols"> | ||||
<t/> | ||||
</list></t> | ||||
</list></t> | ||||
</section> | ||||
<section numbered="false" toc="default"> | <section numbered="false" toc="default"> | |||
<name>Acknowledgements</name> | <name>Acknowledgements</name> | |||
<t>Thanks to Anil Agarwal, Sowmini Varadhan, Gabi Bracha, Nicolas Kuhn, | <t>Thanks to <contact fullname="Anil Agarwal"/>, <contact | |||
Greg Skinner, Tom Henderson, David Pullen, Mirja Kuehlewind, Gorry | fullname="Sowmini Varadhan"/>, <contact fullname="Gabi Bracha"/>, | |||
Fairhurst, Pete Heist, Ermin Sakic and Martin Duke for detailed review | <contact fullname="Nicolas Kuhn"/>, <contact fullname="Greg Skinner"/>, | |||
comments particularly of the appendices and suggestions on how to make | <contact fullname="Tom Henderson"/>, <contact fullname="David Pullen"/>, | |||
the explanations clearer. Thanks also to Tom Henderson for insights on | <contact fullname="Mirja Kühlewind"/>, <contact fullname="Gorry | |||
the choice of schedulers and queue delay measurement techniques. And | Fairhurst"/>, <contact fullname="Pete Heist"/>, <contact fullname="Ermin | |||
thanks to the area reviewers Christer Holmberg, Lars Eggert and Roman | Sakic"/>, and <contact fullname="Martin Duke"/> for detailed review | |||
Danyliw.</t> | comments, particularly of the appendices, and suggestions on how to make | |||
<t>The early contributions of Koen De Schepper, Bob Briscoe, Olga | the explanations clearer. Thanks also to <contact fullname="Tom | |||
Bondarenko and Inton Tsang were part-funded by the European Community | Henderson"/> for insight on the choice of schedulers and queue delay | |||
measurement techniques. And thanks to the area reviewers <contact | ||||
fullname="Christer Holmberg"/>, <contact fullname="Lars Eggert"/>, and | ||||
<contact fullname="Roman Danyliw"/>.</t> | ||||
<t>The early contributions of <contact fullname="Koen De Schepper"/>, <con | ||||
tact fullname="Bob Briscoe"/>, <contact fullname="Olga | ||||
Bondarenko"/>, and <contact fullname="Inton Tsang"/> were partly funded by | ||||
the European Community | ||||
under its Seventh Framework Programme through the Reducing Internet | under its Seventh Framework Programme through the Reducing Internet | |||
Transport Latency (RITE) project (ICT-317700). Contributions of Koen De | Transport Latency (RITE) project (ICT-317700). Contributions of <contact f | |||
Schepper and Olivier Tilmans were also part-funded by the 5Growth and | ullname="Koen De | |||
DAEMON EU H2020 projects. Bob Briscoe's contribution was also | Schepper"/> and <contact fullname="Olivier Tilmans"/> were also partly fun | |||
part-funded by the Comcast Innovation Fund and the Research Council of | ded by the 5Growth and | |||
DAEMON EU H2020 projects. <contact fullname="Bob Briscoe"/>'s contribution | ||||
was also | ||||
partly funded by the Comcast Innovation Fund and the Research Council of | ||||
Norway through the TimeIn project. The views expressed here are solely | Norway through the TimeIn project. The views expressed here are solely | |||
those of the authors.</t> | those of the authors.</t> | |||
</section> | </section> | |||
<section numbered="false" toc="default"> | <section numbered="false" toc="default"> | |||
<name>Contributors</name> | <name>Contributors</name> | |||
<t>The following contributed implementations and evaluations that | <t>The following contributed implementations and evaluations that | |||
validated and helped to improve this specification:</t> | validated and helped to improve this specification:</t> | |||
<ul empty="true" spacing="normal"> | <t><contact fullname="Olga Albisser"/> <olga@albisser.org> of Simu | |||
<li>Olga Albisser <olga@albisser.org> of Simula Research Lab, | la Research Lab, | |||
Norway (Olga Bondarenko during early drafts) implemented the | Norway (Olga Bondarenko during early draft versions) implemented the | |||
prototype DualPI2 AQM for Linux with Koen De Schepper and conducted | prototype DualPI2 AQM for Linux with Koen De Schepper and conducted | |||
extensive evaluations as well as implementing the live performance | extensive evaluations as well as implementing the live performance | |||
visualization GUI <xref target="L4Sdemo16" format="default"/>.</li> | visualization GUI <xref target="L4Sdemo16" format="default"/>.</t> | |||
<li>Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com> of | <t><contact fullname="Olivier Tilmans"/> <olivier.tilmans@nokia-bell- | |||
labs.com> of | ||||
Nokia Bell Labs, Belgium prepared and maintains the Linux | Nokia Bell Labs, Belgium prepared and maintains the Linux | |||
implementation of DualPI2 for upstreaming.</li> | implementation of DualPI2 for upstreaming.</t> | |||
<li>Shravya K.S. wrote a model for the ns-3 simulator based on the | <t><contact fullname="Shravya K.S."/> wrote a model for the ns-3 simulat | |||
-01 version of this Internet-Draft. Based on this initial work, Tom | or based on draft-ietf-tsvwg-aqm-dualq-coupled-01 (a draft version of this docum | |||
Henderson <tomh@tomh.org> updated that earlier model and | ent). Based on this initial work, <contact fullname="Tom | |||
created a model for the DualQ variant specified as part of the Low | Henderson"/> <tomh@tomh.org> updated that earlier model and | |||
Latency DOCSIS specification, as well as conducting extensive | created a model for the DualQ variant specified as part of the Low Lat | |||
evaluations.</li> | ency | |||
<li>Ing Jyh (Inton) Tsang of Nokia, Belgium built the End-to-End Data | DOCSIS specification, as well as conducting extensive | |||
evaluations.</t> | ||||
<t><contact fullname="Ing Jyh (Inton) Tsang"/> of Nokia, Belgium built t | ||||
he End-to-End Data | ||||
Centre to the Home broadband testbed on which DualQ Coupled AQM | Centre to the Home broadband testbed on which DualQ Coupled AQM | |||
implementations were tested.</li> | implementations were tested.</t> | |||
</ul> | ||||
</section> | </section> | |||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 551 change blocks. | ||||
2127 lines changed or deleted | 1308 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |