mirror of
https://github.com/hoernschen/dendrite.git
synced 2025-08-02 14:12:47 +00:00
Add NATS JetStream support (#1866)
* Add NATS JetStream support Update shopify/sarama * Fix addresses * Don't change Addresses in Defaults * Update saramajetstream * Add missing error check Keep typing events for at least one minute * Use all configured NATS addresses * Update saramajetstream * Try setting up with NATS * Make sure NATS uses own persistent directory (TODO: make this configurable) * Update go.mod/go.sum * Jetstream package * Various other refactoring * Build fixes * Config tweaks, make random jetstream storage path for CI * Disable interest policies * Try to sane default on jetstream base path * Try to use in-memory for CI * Restore storage/retention * Update nats.go dependency * Adapt changes to config * Remove unneeded TopicFor * Dep update * Revert "Remove unneeded TopicFor" This reverts commitf5a4e4a339
. * Revert changes made to streams * Fix build problems * Update nats-server * Update go.mod/go.sum * Roomserver input API queuing using NATS * Fix topic naming * Prometheus metrics * More refactoring to remove saramajetstream * Add missing topic * Don't try to populate map that doesn't exist * Roomserver output topic * Update go.mod/go.sum * Message acknowledgements * Ack tweaks * Try to resume transaction re-sends * Try to resume transaction re-sends * Update to matrix-org/gomatrixserverlib@91dadfb * Remove internal.PartitionStorer from components that don't consume keychanges * Try to reduce re-allocations a bit in resolveConflictsV2 * Tweak delivery options on RS input * Publish send-to-device messages into correct JetStream subject * Async and sync roomserver input * Update dendrite-config.yaml * Remove roomserver tests for now (they need rewriting) * Remove roomserver test again (was merged back in) * Update documentation * Docker updates * More Docker updates * Update Docker readme again * Fix lint issues * Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that * Go 1.16 instead of Go 1.13 for upgrade tests and Complement * Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit368675283f
. * Don't report any errors on `/send` to see what fun that creates * Fix panics on closed channel sends * Enforce state key matches sender * Do the same for leave * Various tweaks to make tests happier Squashed commit of the following: commit13f9028e7a
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:47:14 2022 +0000 Do the same for leave commite6be7f05c3
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:33:42 2022 +0000 Enforce state key matches sender commit85ede6d64b
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 14:07:04 2022 +0000 Fix panics on closed channel sends commit9755494a98
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:38:22 2022 +0000 Don't report any errors on `/send` to see what fun that creates commit3bb4f87b5d
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:00:26 2022 +0000 Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit368675283f
. commitfe2673ed7b
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 12:09:34 2022 +0000 Go 1.16 instead of Go 1.13 for upgrade tests and Complement commit368675283f
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 11:51:45 2022 +0000 Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that commitb028dfc085
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 10:29:08 2022 +0000 Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Merge in NATS Server v2.6.6 and nats.go v1.13 into the in-process connection fork * Add `jetstream.WithJetStreamMessage` to make ack/nak-ing less messy, use process context in consumers * Fix consumer component name in federation API * Add comment explaining where streams are defined * Tweaks to roomserver input with comments * Finish that sentence that I apparently forgot to finish in INSTALL.md * Bump version number of config to 2 * Add comments around asynchronous sends to roomserver in processEventWithMissingState * More useful error message when the config version does not match * Set version in generate-config * Fix version in config.Defaults Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
This commit is contained in:
parent
a47b12dc7d
commit
161f145176
75 changed files with 1317 additions and 1696 deletions
|
@ -19,117 +19,115 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/Shopify/sarama"
|
||||
"github.com/matrix-org/dendrite/federationapi/queue"
|
||||
"github.com/matrix-org/dendrite/federationapi/storage"
|
||||
"github.com/matrix-org/dendrite/federationapi/types"
|
||||
"github.com/matrix-org/dendrite/internal"
|
||||
"github.com/matrix-org/dendrite/roomserver/api"
|
||||
"github.com/matrix-org/dendrite/setup/config"
|
||||
"github.com/matrix-org/dendrite/setup/jetstream"
|
||||
"github.com/matrix-org/dendrite/setup/process"
|
||||
"github.com/matrix-org/gomatrixserverlib"
|
||||
"github.com/nats-io/nats.go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// OutputRoomEventConsumer consumes events that originated in the room server.
|
||||
type OutputRoomEventConsumer struct {
|
||||
cfg *config.FederationAPI
|
||||
rsAPI api.RoomserverInternalAPI
|
||||
rsConsumer *internal.ContinualConsumer
|
||||
db storage.Database
|
||||
queues *queue.OutgoingQueues
|
||||
ctx context.Context
|
||||
cfg *config.FederationAPI
|
||||
rsAPI api.RoomserverInternalAPI
|
||||
jetstream nats.JetStreamContext
|
||||
db storage.Database
|
||||
queues *queue.OutgoingQueues
|
||||
topic string
|
||||
}
|
||||
|
||||
// NewOutputRoomEventConsumer creates a new OutputRoomEventConsumer. Call Start() to begin consuming from room servers.
|
||||
func NewOutputRoomEventConsumer(
|
||||
process *process.ProcessContext,
|
||||
cfg *config.FederationAPI,
|
||||
kafkaConsumer sarama.Consumer,
|
||||
js nats.JetStreamContext,
|
||||
queues *queue.OutgoingQueues,
|
||||
store storage.Database,
|
||||
rsAPI api.RoomserverInternalAPI,
|
||||
) *OutputRoomEventConsumer {
|
||||
consumer := internal.ContinualConsumer{
|
||||
Process: process,
|
||||
ComponentName: "federationapi/roomserver",
|
||||
Topic: string(cfg.Matrix.Kafka.TopicFor(config.TopicOutputRoomEvent)),
|
||||
Consumer: kafkaConsumer,
|
||||
PartitionStore: store,
|
||||
return &OutputRoomEventConsumer{
|
||||
ctx: process.Context(),
|
||||
cfg: cfg,
|
||||
jetstream: js,
|
||||
db: store,
|
||||
queues: queues,
|
||||
rsAPI: rsAPI,
|
||||
topic: cfg.Matrix.JetStream.TopicFor(jetstream.OutputRoomEvent),
|
||||
}
|
||||
s := &OutputRoomEventConsumer{
|
||||
cfg: cfg,
|
||||
rsConsumer: &consumer,
|
||||
db: store,
|
||||
queues: queues,
|
||||
rsAPI: rsAPI,
|
||||
}
|
||||
consumer.ProcessMessage = s.onMessage
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Start consuming from room servers
|
||||
func (s *OutputRoomEventConsumer) Start() error {
|
||||
return s.rsConsumer.Start()
|
||||
_, err := s.jetstream.Subscribe(s.topic, s.onMessage)
|
||||
return err
|
||||
}
|
||||
|
||||
// onMessage is called when the federation server receives a new event from the room server output log.
|
||||
// It is unsafe to call this with messages for the same room in multiple gorountines
|
||||
// because updates it will likely fail with a types.EventIDMismatchError when it
|
||||
// realises that it cannot update the room state using the deltas.
|
||||
func (s *OutputRoomEventConsumer) onMessage(msg *sarama.ConsumerMessage) error {
|
||||
// Parse out the event JSON
|
||||
var output api.OutputEvent
|
||||
if err := json.Unmarshal(msg.Value, &output); err != nil {
|
||||
// If the message was invalid, log it and move on to the next message in the stream
|
||||
log.WithError(err).Errorf("roomserver output log: message parse failure")
|
||||
return nil
|
||||
}
|
||||
|
||||
switch output.Type {
|
||||
case api.OutputTypeNewRoomEvent:
|
||||
ev := output.NewRoomEvent.Event
|
||||
|
||||
if output.NewRoomEvent.RewritesState {
|
||||
if err := s.db.PurgeRoomState(context.TODO(), ev.RoomID()); err != nil {
|
||||
return fmt.Errorf("s.db.PurgeRoom: %w", err)
|
||||
}
|
||||
func (s *OutputRoomEventConsumer) onMessage(msg *nats.Msg) {
|
||||
jetstream.WithJetStreamMessage(msg, func(msg *nats.Msg) bool {
|
||||
// Parse out the event JSON
|
||||
var output api.OutputEvent
|
||||
if err := json.Unmarshal(msg.Data, &output); err != nil {
|
||||
// If the message was invalid, log it and move on to the next message in the stream
|
||||
log.WithError(err).Errorf("roomserver output log: message parse failure")
|
||||
return true
|
||||
}
|
||||
|
||||
if err := s.processMessage(*output.NewRoomEvent); err != nil {
|
||||
switch err.(type) {
|
||||
case *queue.ErrorFederationDisabled:
|
||||
log.WithField("error", output.Type).Info(
|
||||
err.Error(),
|
||||
)
|
||||
default:
|
||||
// panic rather than continue with an inconsistent database
|
||||
switch output.Type {
|
||||
case api.OutputTypeNewRoomEvent:
|
||||
ev := output.NewRoomEvent.Event
|
||||
|
||||
if output.NewRoomEvent.RewritesState {
|
||||
if err := s.db.PurgeRoomState(s.ctx, ev.RoomID()); err != nil {
|
||||
log.WithError(err).Errorf("roomserver output log: purge room state failure")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.processMessage(*output.NewRoomEvent); err != nil {
|
||||
switch err.(type) {
|
||||
case *queue.ErrorFederationDisabled:
|
||||
log.WithField("error", output.Type).Info(
|
||||
err.Error(),
|
||||
)
|
||||
default:
|
||||
// panic rather than continue with an inconsistent database
|
||||
log.WithFields(log.Fields{
|
||||
"event_id": ev.EventID(),
|
||||
"event": string(ev.JSON()),
|
||||
"add": output.NewRoomEvent.AddsStateEventIDs,
|
||||
"del": output.NewRoomEvent.RemovesStateEventIDs,
|
||||
log.ErrorKey: err,
|
||||
}).Panicf("roomserver output log: write room event failure")
|
||||
}
|
||||
}
|
||||
|
||||
case api.OutputTypeNewInboundPeek:
|
||||
if err := s.processInboundPeek(*output.NewInboundPeek); err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"event_id": ev.EventID(),
|
||||
"event": string(ev.JSON()),
|
||||
"add": output.NewRoomEvent.AddsStateEventIDs,
|
||||
"del": output.NewRoomEvent.RemovesStateEventIDs,
|
||||
"event": output.NewInboundPeek,
|
||||
log.ErrorKey: err,
|
||||
}).Panicf("roomserver output log: write room event failure")
|
||||
}).Panicf("roomserver output log: remote peek event failure")
|
||||
return false
|
||||
}
|
||||
return nil
|
||||
}
|
||||
case api.OutputTypeNewInboundPeek:
|
||||
if err := s.processInboundPeek(*output.NewInboundPeek); err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"event": output.NewInboundPeek,
|
||||
log.ErrorKey: err,
|
||||
}).Panicf("roomserver output log: remote peek event failure")
|
||||
return nil
|
||||
}
|
||||
default:
|
||||
log.WithField("type", output.Type).Debug(
|
||||
"roomserver output log: ignoring unknown output type",
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
default:
|
||||
log.WithField("type", output.Type).Debug(
|
||||
"roomserver output log: ignoring unknown output type",
|
||||
)
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// processInboundPeek starts tracking a new federated inbound peek (replacing the existing one if any)
|
||||
|
@ -146,7 +144,7 @@ func (s *OutputRoomEventConsumer) processInboundPeek(orp api.OutputNewInboundPee
|
|||
//
|
||||
// This is making the tests flakey.
|
||||
|
||||
return s.db.AddInboundPeek(context.TODO(), orp.ServerName, orp.RoomID, orp.PeekID, orp.RenewalInterval)
|
||||
return s.db.AddInboundPeek(s.ctx, orp.ServerName, orp.RoomID, orp.PeekID, orp.RenewalInterval)
|
||||
}
|
||||
|
||||
// processMessage updates the list of currently joined hosts in the room
|
||||
|
@ -162,7 +160,7 @@ func (s *OutputRoomEventConsumer) processMessage(ore api.OutputNewRoomEvent) err
|
|||
// TODO(#290): handle EventIDMismatchError and recover the current state by
|
||||
// talking to the roomserver
|
||||
oldJoinedHosts, err := s.db.UpdateRoom(
|
||||
context.TODO(),
|
||||
s.ctx,
|
||||
ore.Event.RoomID(),
|
||||
ore.LastSentEventID,
|
||||
ore.Event.EventID(),
|
||||
|
@ -255,7 +253,7 @@ func (s *OutputRoomEventConsumer) joinedHostsAtEvent(
|
|||
}
|
||||
|
||||
// handle peeking hosts
|
||||
inboundPeeks, err := s.db.GetInboundPeeks(context.TODO(), ore.Event.Event.RoomID())
|
||||
inboundPeeks, err := s.db.GetInboundPeeks(s.ctx, ore.Event.Event.RoomID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -373,7 +371,7 @@ func (s *OutputRoomEventConsumer) lookupStateEvents(
|
|||
// from the roomserver using the query API.
|
||||
eventReq := api.QueryEventsByIDRequest{EventIDs: missing}
|
||||
var eventResp api.QueryEventsByIDResponse
|
||||
if err := s.rsAPI.QueryEventsByID(context.TODO(), &eventReq, &eventResp); err != nil {
|
||||
if err := s.rsAPI.QueryEventsByID(s.ctx, &eventReq, &eventResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue