[PATCH] lib: Non-configurable delayed acks in FRCP

  • From: Dimitri Staessens <dimitri@ouroboros.rocks>
  • To: ouroboros@xxxxxxxxxxxxx
  • Date: Sat, 19 Mar 2022 21:47:01 +0100

It doesn't really make sense to manually and one-sidedly configure the
timeout of delayed acknowledgements, as setting it too high upsets the
peer's sRTT estimates. Even worse, it also causes a lot of spurious
retransmissions if it exceeds the sRTT mean deviation calculated by
the receiver. Compensating on bare acknowledgment for the ack delay
could improve the RTT estimate deviation, but not the spurious
retransmissions if it was set too high. This sets the delayed ack to
wait for a single RTT mean deviation. Probably needs more tweaking to
further reduce differences between the RTT estimates at the sender and
receiver, e.g. compensate the RTT estimate for delayed acks, or
increase the RTO to add 8 mdevs to sRTT instead of 4. However, it
looks like the mdev estimate is the trickiest one to get to sync, not
the RTT average. Linux reduces the sample weight for mdev from 1/4 to
1/32 in some cases, will give that a shot some day too to see if that
further align sRTT estimates. In any case, this patch already improves
things a lot.

Also fixes a bug where the sender was sending acknowlegments on the
first packets in flight for the 0 sequence number. The receiver
activity was measured in seconds but compared to a timeout value in
nanoseconds.

There's still a lot of spurious retransmissions that start after
actual packet loss occurs, I'm still investigating what causes it.

Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks>
---
 src/lib/CMakeLists.txt |  8 +++-----
 src/lib/config.h.in    |  2 --
 src/lib/frct.c         | 28 ++++++++++++----------------
 src/lib/timerwheel.c   | 20 +++++++++++---------
 4 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index 022c5cca..63b3da8d 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -192,15 +192,13 @@ set(DELTA_T_ACK 10 CACHE STRING
   "Maximum time to acknowledge a packet (s)")
 set(DELTA_T_RTX 120 CACHE STRING
   "Maximum time to retransmit a packet (s)")
-set(DELTA_T_ACK_DELAY 10 CACHE STRING
-  "Maximum time to wait before acknowledging a packet (ms)")
 set(FRCT_REORDER_QUEUE_SIZE 256 CACHE STRING
   "Size of the reordering queue, must be a power of 2")
 set(FRCT_START_WINDOW 64 CACHE STRING
   "Start window, must be a power of 2")
 set(FRCT_RTO_MIN 250 CACHE STRING
   "Minimum Retransmission Timeout (RTO) for FRCT (us)")
-set(FRCT_TICK_TIME 5000 CACHE STRING
+set(FRCT_TICK_TIME 250 CACHE STRING
   "Tick time for FRCT activity (retransmission, acknowledgments) (us)")
 set(RXM_BUFFER_ON_HEAP FALSE CACHE BOOL
   "Store packets for retransmission on the heap instead of in packet buffer")
@@ -214,9 +212,9 @@ set(RXM_WHEEL_LEVELS 3 CACHE STRING
   "Number of levels in the retransmission wheel")
 set(RXM_WHEEL_SLOTS_PER_LEVEL 256 CACHE STRING
   "Number of slots per level in the retransmission wheel, must be a power of 
2")
-set(ACK_WHEEL_SLOTS 128 CACHE STRING
+set(ACK_WHEEL_SLOTS 256 CACHE STRING
   "Number of slots in the acknowledgment wheel, must be a power of 2")
-set(ACK_WHEEL_RESOLUTION 20 CACHE STRING
+set(ACK_WHEEL_RESOLUTION 18 CACHE STRING
   "Minimum acknowledgment delay (ns), as a power to 2")
 
 if (HAVE_FUSE)
diff --git a/src/lib/config.h.in b/src/lib/config.h.in
index d534cf77..a51c2791 100644
--- a/src/lib/config.h.in
+++ b/src/lib/config.h.in
@@ -72,8 +72,6 @@
 #define DELT_A              (@DELTA_T_ACK@)                  /* ns */
 #define DELT_R              (@DELTA_T_RTX@)                  /* ns */
 
-#define DELT_ACK            (@DELTA_T_ACK_DELAY@ * MILLION)  /* ns */
-
 #define RQ_SIZE             (@FRCT_REORDER_QUEUE_SIZE@)
 #define START_WINDOW        (@FRCT_START_WINDOW@)
 #define RTO_MIN             (@FRCT_RTO_MIN@ * 1000)
diff --git a/src/lib/frct.c b/src/lib/frct.c
index e7193da0..99962868 100644
--- a/src/lib/frct.c
+++ b/src/lib/frct.c
@@ -279,9 +279,11 @@ static void send_frct_pkt(struct frcti * frcti)
 
         assert(frcti);
 
-        pthread_rwlock_rdlock(&frcti->lock);
+        clock_gettime(PTHREAD_COND_CLOCK, &now);
 
-        if (frcti->rcv_cr.lwe == frcti->rcv_cr.seqno) {
+        pthread_rwlock_wrlock(&frcti->lock);
+
+        if (!after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno)) {
                 pthread_rwlock_unlock(&frcti->lock);
                 return;
         }
@@ -290,24 +292,19 @@ static void send_frct_pkt(struct frcti * frcti)
         ackno = frcti->rcv_cr.lwe;
         rwe   = frcti->rcv_cr.rwe;
 
-        clock_gettime(PTHREAD_COND_CLOCK, &now);
-
         diff = ts_diff_ns(&frcti->rcv_cr.act, &now);
 
-        pthread_rwlock_unlock(&frcti->lock);
-
-        if (diff > frcti->a || diff < DELT_ACK)
+        if (diff > frcti->a || diff < frcti->mdev) {
+                pthread_rwlock_unlock(&frcti->lock);
                 return;
+        }
+
+        frcti->rcv_cr.seqno = frcti->rcv_cr.lwe;
+
+        pthread_rwlock_unlock(&frcti->lock);
 
         __send_frct_pkt(fd, FRCT_ACK | FRCT_FC, ackno, rwe);
 
-        pthread_rwlock_wrlock(&frcti->lock);
-
-        if (after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno))
-                frcti->rcv_cr.seqno = frcti->rcv_cr.lwe;
-
-        pthread_rwlock_unlock(&frcti->lock);
-
 }
 
 static void __send_rdv(int fd)
@@ -734,8 +731,7 @@ static int __frcti_snd(struct frcti *       frcti,
                         frcti->t_probe = now;
                         frcti->probe   = true;
                 }
-
-                if (now.tv_sec - rcv_cr->act.tv_sec <= frcti->a) {
+                if ((now.tv_sec - rcv_cr->act.tv_sec) * BILLION <= frcti->a) {
                         pci->flags |= FRCT_ACK;
                         pci->ackno = hton32(rcv_cr->lwe);
                         rcv_cr->seqno = rcv_cr->lwe;
diff --git a/src/lib/timerwheel.c b/src/lib/timerwheel.c
index eb25758b..580f838d 100644
--- a/src/lib/timerwheel.c
+++ b/src/lib/timerwheel.c
@@ -218,8 +218,6 @@ static void timerwheel_move(void)
 
                                 if (ts_to_ns(now) - act > (rto << 2))
                                         rto <<= r->mul++;
-                                else
-                                        r->mul = 0;
 
                                 /* Schedule at least in the next time slot. */
                                 slot = ts_to_ns(now) >> RXMQ_RES;
@@ -234,7 +232,7 @@ static void timerwheel_move(void)
                                 if (lvl >= RXMQ_LVLS) /* Can't reschedule */
                                         goto flow_down;
 
-                                rslot = (rslot + slot) & (RXMQ_SLOTS - 1);
+                                rslot = (rslot + slot + 1) & (RXMQ_SLOTS - 1);
 
 #ifdef RXM_BLOCKING
                                 if (ipcp_sdb_reserve(&sdb, r->len) < 0)
@@ -256,6 +254,7 @@ static void timerwheel_move(void)
 
                                 /* Retransmit the copy. */
                                 pci->ackno = hton32(rcv_lwe);
+
 #ifdef RXM_BLOCKING
                                 if (shm_rbuff_write_b(f->tx_rb, idx, NULL) < 0)
 #else
@@ -264,15 +263,14 @@ static void timerwheel_move(void)
                                         goto flow_down;
                                 shm_flow_set_notify(f->set, f->flow_id,
                                                     FLOW_PKT);
-
-                        reschedule:
+                         reschedule:
                                 list_add(&r->next, &rw.rxms[lvl][rslot]);
                                 continue;
 
-                        flow_down:
+                         flow_down:
                                 shm_rbuff_set_acl(f->tx_rb, ACL_FLOWDOWN);
                                 shm_rbuff_set_acl(f->rx_rb, ACL_FLOWDOWN);
-                        cleanup:
+                         cleanup:
 #ifdef RXM_BUFFER_ON_HEAP
                                 free(r->pkt);
 #else
@@ -375,7 +373,7 @@ static int timerwheel_rxm(struct frcti *       frcti,
                 return -EPERM;
         }
 
-        slot = (slot + rto_slot) & (RXMQ_SLOTS - 1);
+        slot = (slot + rto_slot + 1) & (RXMQ_SLOTS - 1);
 
         pthread_mutex_lock(&rw.lock);
 
@@ -403,9 +401,13 @@ static int timerwheel_ack(int            fd,
 
         clock_gettime(PTHREAD_COND_CLOCK, &now);
 
-        slot = (((ts_to_ns(now) + DELT_ACK) >> ACKQ_RES) + 1)
+        pthread_rwlock_rdlock(&frcti->lock);
+
+        slot = (((ts_to_ns(now) + frcti->mdev) >> ACKQ_RES) + 1)
                 & (ACKQ_SLOTS - 1);
 
+        pthread_rwlock_unlock(&frcti->lock);
+
         a->fd    = fd;
         a->frcti = frcti;
         a->flow_id = ai.flows[fd].flow_id;
-- 
2.35.1


Other related posts:

  • » [PATCH] lib: Non-configurable delayed acks in FRCP - Dimitri Staessens