Federation sender refactor (#1621)

* Refactor federation sender, again

* Clean up better

* Missing operators

* Try to get overflowed events from database

* Fix queries

* Log less

* Comments

* nil PDUs/EDUs shouldn't happen but guard against them for safety

* Tweak logging

* Fix transaction coalescing

* Update comments

* Check nils more

* Remove channels as they add extra complexity and possibly will deadlock

* Don't hold lock while sending transaction

* Less spam about sleeping queues

* Comments

* Bug-fixing

* Don't try to rehydrate twice

* Don't queue in memory for blacklisted destinations

* Don't queue in memory for blacklisted destinations

* Fix a couple of bugs

* Check for duplicates when pulling things out of the database

* Durable transactions, some more refactoring

* Revert "Durable transactions, some more refactoring"

This reverts commit 5daf924eaaefec5e4f7c12c16ca24e898de4adbb.

* Fix deadlock
This commit is contained in:
Neil Alexander 2020-12-09 10:03:22 +00:00 committed by GitHub
parent e1d32e2ff1
commit f64c8822bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 294 additions and 267 deletions

View file

@ -17,7 +17,6 @@ package shared
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"github.com/matrix-org/dendrite/federationsender/storage/tables"
@ -44,16 +43,11 @@ type Database struct {
// to pass them back so that we can clean up if the transaction sends
// successfully.
type Receipt struct {
nids []int64
nid int64
}
func (e *Receipt) Empty() bool {
return len(e.nids) == 0
}
func (e *Receipt) String() string {
j, _ := json.Marshal(e.nids)
return string(j)
func (r *Receipt) String() string {
return fmt.Sprintf("%d", r.nid)
}
// UpdateRoom updates the joined hosts for a room and returns what the joined
@ -146,7 +140,7 @@ func (d *Database) StoreJSON(
return nil, fmt.Errorf("d.insertQueueJSON: %w", err)
}
return &Receipt{
nids: []int64{nid},
nid: nid,
}, nil
}

View file

@ -33,16 +33,14 @@ func (d *Database) AssociateEDUWithDestination(
receipt *Receipt,
) error {
return d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
for _, nid := range receipt.nids {
if err := d.FederationSenderQueueEDUs.InsertQueueEDU(
ctx, // context
txn, // SQL transaction
"", // TODO: EDU type for coalescing
serverName, // destination server name
nid, // NID from the federationsender_queue_json table
); err != nil {
return fmt.Errorf("InsertQueueEDU: %w", err)
}
if err := d.FederationSenderQueueEDUs.InsertQueueEDU(
ctx, // context
txn, // SQL transaction
"", // TODO: EDU type for coalescing
serverName, // destination server name
receipt.nid, // NID from the federationsender_queue_json table
); err != nil {
return fmt.Errorf("InsertQueueEDU: %w", err)
}
return nil
})
@ -50,29 +48,25 @@ func (d *Database) AssociateEDUWithDestination(
// GetNextTransactionEDUs retrieves events from the database for
// the next pending transaction, up to the limit specified.
func (d *Database) GetNextTransactionEDUs(
func (d *Database) GetPendingEDUs(
ctx context.Context,
serverName gomatrixserverlib.ServerName,
limit int,
) (
edus []*gomatrixserverlib.EDU,
receipt *Receipt,
edus map[*Receipt]*gomatrixserverlib.EDU,
err error,
) {
edus = make(map[*Receipt]*gomatrixserverlib.EDU)
err = d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
nids, err := d.FederationSenderQueueEDUs.SelectQueueEDUs(ctx, txn, serverName, limit)
if err != nil {
return fmt.Errorf("SelectQueueEDUs: %w", err)
}
receipt = &Receipt{
nids: nids,
}
retrieve := make([]int64, 0, len(nids))
for _, nid := range nids {
if edu, ok := d.Cache.GetFederationSenderQueuedEDU(nid); ok {
edus = append(edus, edu)
edus[&Receipt{nid}] = edu
} else {
retrieve = append(retrieve, nid)
}
@ -83,12 +77,12 @@ func (d *Database) GetNextTransactionEDUs(
return fmt.Errorf("SelectQueueJSON: %w", err)
}
for _, blob := range blobs {
for nid, blob := range blobs {
var event gomatrixserverlib.EDU
if err := json.Unmarshal(blob, &event); err != nil {
return fmt.Errorf("json.Unmarshal: %w", err)
}
edus = append(edus, &event)
edus[&Receipt{nid}] = &event
}
return nil
@ -101,19 +95,24 @@ func (d *Database) GetNextTransactionEDUs(
func (d *Database) CleanEDUs(
ctx context.Context,
serverName gomatrixserverlib.ServerName,
receipt *Receipt,
receipts []*Receipt,
) error {
if receipt == nil {
if len(receipts) == 0 {
return errors.New("expected receipt")
}
nids := make([]int64, len(receipts))
for i := range receipts {
nids[i] = receipts[i].nid
}
return d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
if err := d.FederationSenderQueueEDUs.DeleteQueueEDUs(ctx, txn, serverName, receipt.nids); err != nil {
if err := d.FederationSenderQueueEDUs.DeleteQueueEDUs(ctx, txn, serverName, nids); err != nil {
return err
}
var deleteNIDs []int64
for _, nid := range receipt.nids {
for _, nid := range nids {
count, err := d.FederationSenderQueueEDUs.SelectQueueEDUReferenceJSONCount(ctx, txn, nid)
if err != nil {
return fmt.Errorf("SelectQueueEDUReferenceJSONCount: %w", err)

View file

@ -34,16 +34,14 @@ func (d *Database) AssociatePDUWithDestination(
receipt *Receipt,
) error {
return d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
for _, nid := range receipt.nids {
if err := d.FederationSenderQueuePDUs.InsertQueuePDU(
ctx, // context
txn, // SQL transaction
transactionID, // transaction ID
serverName, // destination server name
nid, // NID from the federationsender_queue_json table
); err != nil {
return fmt.Errorf("InsertQueuePDU: %w", err)
}
if err := d.FederationSenderQueuePDUs.InsertQueuePDU(
ctx, // context
txn, // SQL transaction
transactionID, // transaction ID
serverName, // destination server name
receipt.nid, // NID from the federationsender_queue_json table
); err != nil {
return fmt.Errorf("InsertQueuePDU: %w", err)
}
return nil
})
@ -51,14 +49,12 @@ func (d *Database) AssociatePDUWithDestination(
// GetNextTransactionPDUs retrieves events from the database for
// the next pending transaction, up to the limit specified.
func (d *Database) GetNextTransactionPDUs(
func (d *Database) GetPendingPDUs(
ctx context.Context,
serverName gomatrixserverlib.ServerName,
limit int,
) (
transactionID gomatrixserverlib.TransactionID,
events []*gomatrixserverlib.HeaderedEvent,
receipt *Receipt,
events map[*Receipt]*gomatrixserverlib.HeaderedEvent,
err error,
) {
// Strictly speaking this doesn't need to be using the writer
@ -66,29 +62,17 @@ func (d *Database) GetNextTransactionPDUs(
// a guarantee of transactional isolation, it's actually useful
// to know in SQLite mode that nothing else is trying to modify
// the database.
events = make(map[*Receipt]*gomatrixserverlib.HeaderedEvent)
err = d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
transactionID, err = d.FederationSenderQueuePDUs.SelectQueuePDUNextTransactionID(ctx, txn, serverName)
if err != nil {
return fmt.Errorf("SelectQueuePDUNextTransactionID: %w", err)
}
if transactionID == "" {
return nil
}
nids, err := d.FederationSenderQueuePDUs.SelectQueuePDUs(ctx, txn, serverName, transactionID, limit)
nids, err := d.FederationSenderQueuePDUs.SelectQueuePDUs(ctx, txn, serverName, limit)
if err != nil {
return fmt.Errorf("SelectQueuePDUs: %w", err)
}
receipt = &Receipt{
nids: nids,
}
retrieve := make([]int64, 0, len(nids))
for _, nid := range nids {
if event, ok := d.Cache.GetFederationSenderQueuedPDU(nid); ok {
events = append(events, event)
events[&Receipt{nid}] = event
} else {
retrieve = append(retrieve, nid)
}
@ -104,7 +88,7 @@ func (d *Database) GetNextTransactionPDUs(
if err := json.Unmarshal(blob, &event); err != nil {
return fmt.Errorf("json.Unmarshal: %w", err)
}
events = append(events, &event)
events[&Receipt{nid}] = &event
d.Cache.StoreFederationSenderQueuedPDU(nid, &event)
}
@ -119,19 +103,24 @@ func (d *Database) GetNextTransactionPDUs(
func (d *Database) CleanPDUs(
ctx context.Context,
serverName gomatrixserverlib.ServerName,
receipt *Receipt,
receipts []*Receipt,
) error {
if receipt == nil {
if len(receipts) == 0 {
return errors.New("expected receipt")
}
nids := make([]int64, len(receipts))
for i := range receipts {
nids[i] = receipts[i].nid
}
return d.Writer.Do(d.DB, nil, func(txn *sql.Tx) error {
if err := d.FederationSenderQueuePDUs.DeleteQueuePDUs(ctx, txn, serverName, receipt.nids); err != nil {
if err := d.FederationSenderQueuePDUs.DeleteQueuePDUs(ctx, txn, serverName, nids); err != nil {
return err
}
var deleteNIDs []int64
for _, nid := range receipt.nids {
for _, nid := range nids {
count, err := d.FederationSenderQueuePDUs.SelectQueuePDUReferenceJSONCount(ctx, txn, nid)
if err != nil {
return fmt.Errorf("SelectQueuePDUReferenceJSONCount: %w", err)