Refactor Federation Destination Queues (#2807)

This is a refactor of the federation destination queues.
It fixes a few things, namely:
- actually retry outgoing events with backoff behaviour
- obtain enough events from the database to fill messages as much as
possible
- minimize the amount of running goroutines
  - use pure timers for backoff
  - don't restart queue unless necessary
  - close the background task when backing off
- increase max edus in a transaction to match the spec
- cleanup timers more aggresively to reduce memory usage
- add jitter to backoff timers to reduce resource spikes
- add a bunch of tests (with real and fake databases) to ensure
everything is working
This commit is contained in:
devonh 2022-10-19 10:03:16 +00:00 committed by GitHub
parent 3aa92efaa3
commit 241d5c47df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1410 additions and 202 deletions

View file

@ -162,23 +162,25 @@ func (oqs *OutgoingQueues) getQueue(destination gomatrixserverlib.ServerName) *d
if !ok || oq == nil {
destinationQueueTotal.Inc()
oq = &destinationQueue{
queues: oqs,
db: oqs.db,
process: oqs.process,
rsAPI: oqs.rsAPI,
origin: oqs.origin,
destination: destination,
client: oqs.client,
statistics: oqs.statistics.ForServer(destination),
notify: make(chan struct{}, 1),
interruptBackoff: make(chan bool),
signing: oqs.signing,
queues: oqs,
db: oqs.db,
process: oqs.process,
rsAPI: oqs.rsAPI,
origin: oqs.origin,
destination: destination,
client: oqs.client,
statistics: oqs.statistics.ForServer(destination),
notify: make(chan struct{}, 1),
signing: oqs.signing,
}
oq.statistics.AssignBackoffNotifier(oq.handleBackoffNotifier)
oqs.queues[destination] = oq
}
return oq
}
// clearQueue removes the queue for the provided destination from the
// set of destination queues.
func (oqs *OutgoingQueues) clearQueue(oq *destinationQueue) {
oqs.queuesMutex.Lock()
defer oqs.queuesMutex.Unlock()
@ -332,7 +334,9 @@ func (oqs *OutgoingQueues) RetryServer(srv gomatrixserverlib.ServerName) {
if oqs.disabled {
return
}
oqs.statistics.ForServer(srv).RemoveBlacklist()
if queue := oqs.getQueue(srv); queue != nil {
queue.statistics.ClearBackoff()
queue.wakeQueueIfNeeded()
}
}