Peeking over federation via MSC2444 (#1391)

* a very very WIP first cut of peeking via MSC2753.

doesn't yet compile or work.
needs to actually add the peeking block into the sync response.
checking in now before it gets any bigger, and to gather any initial feedback on the vague shape of it.

* make PeekingDeviceSet private

* add server_name param

* blind stab at adding a `peek` section to /sync

* make it build

* make it launch

* add peeking to getResponseWithPDUsForCompleteSync

* cancel any peeks when we join a room

* spell out how to runoutside of docker if you want speed

* fix SQL

* remove unnecessary txn for SelectPeeks

* fix s/join/peek/ cargocult fail

* HACK: Track goroutine IDs to determine when we write by the wrong thread

To use: set `DENDRITE_TRACE_SQL=1` then grep for `unsafe`

* Track partition offsets and only log unsafe for non-selects

* Put redactions in the writer goroutine

* Update filters on writer goroutine

* wrap peek storage in goid hack

* use exclusive writer, and MarkPeeksAsOld more efficiently

* don't log ascii in binary at sql trace...

* strip out empty roomd deltas

* re-add txn to SelectPeeks

* re-add accidentally deleted field

* reject peeks for non-worldreadable rooms

* move perform_peek

* fix package

* correctly refactor perform_peek

* WIP of implementing MSC2444

* typo

* Revert "Merge branch 'kegan/HACK-goid-sqlite-db-is-locked' into matthew/peeking"

This reverts commit 3cebd8dbfb, reversing
changes made to ed4b3a58a7.

* (almost) make it build

* clean up bad merge

* support SendEventWithState with optional event

* fix build & lint

* fix build & lint

* reinstate federated peeks in the roomserver (doh)

* fix sql thinko

* todo for authenticating state returned by /peek

* support returning current state from QueryStateAndAuthChain

* handle SS /peek

* reimplement SS /peek to prod the RS to tell the FS about the peek

* rename RemotePeeks as OutboundPeeks

* rename remote_peeks_table as outbound_peeks_table

* add perform_handle_remote_peek.go

* flesh out federation doc

* add inbound peeks table and hook it up

* rename ambiguous RemotePeek as InboundPeek

* rename FSAPI's PerformPeek as PerformOutboundPeek

* setup inbound peeks db correctly

* fix api.SendEventWithState with no event

* track latestevent on /peek

* go fmt

* document the peek send stream race better

* fix SendEventWithRewrite not to bail if handed a non-state event

* add fixme

* switch SS /peek to use SendEventWithRewrite

* fix comment

* use reverse topo ordering to find latest extrem

* support postgres for federated peeking

* go fmt

* back out bogus go.mod change

* Fix performOutboundPeekUsingServer

* Fix getAuthChain -> GetAuthChain

* Fix build issues

* Fix build again

* Fix getAuthChain -> GetAuthChain

* Don't repeat outbound peeks for the same room ID to the same servers

* Fix lint

* Don't omitempty to appease sytest

Co-authored-by: Kegan Dougal <kegan@matrix.org>
Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
This commit is contained in:
Matthew Hodgson 2021-01-22 14:55:08 +00:00 committed by GitHub
parent 5d8ec0ff1a
commit 0571d395b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 1501 additions and 66 deletions

View file

@ -111,6 +111,14 @@ func (s *OutputRoomEventConsumer) onMessage(msg *sarama.ConsumerMessage) error {
}
return nil
}
case api.OutputTypeNewInboundPeek:
if err := s.processInboundPeek(*output.NewInboundPeek); err != nil {
log.WithFields(log.Fields{
"event": output.NewInboundPeek,
log.ErrorKey: err,
}).Panicf("roomserver output log: remote peek event failure")
return nil
}
default:
log.WithField("type", output.Type).Debug(
"roomserver output log: ignoring unknown output type",
@ -121,6 +129,23 @@ func (s *OutputRoomEventConsumer) onMessage(msg *sarama.ConsumerMessage) error {
return nil
}
// processInboundPeek starts tracking a new federated inbound peek (replacing the existing one if any)
// causing the federationsender to start sending messages to the peeking server
func (s *OutputRoomEventConsumer) processInboundPeek(orp api.OutputNewInboundPeek) error {
// FIXME: there's a race here - we should start /sending new peeked events
// atomically after the orp.LatestEventID to ensure there are no gaps between
// the peek beginning and the send stream beginning.
//
// We probably need to track orp.LatestEventID on the inbound peek, but it's
// unclear how we then use that to prevent the race when we start the send
// stream.
//
// This is making the tests flakey.
return s.db.AddInboundPeek(context.TODO(), orp.ServerName, orp.RoomID, orp.PeekID, orp.RenewalInterval)
}
// processMessage updates the list of currently joined hosts in the room
// and then sends the event to the hosts that were joined before the event.
func (s *OutputRoomEventConsumer) processMessage(ore api.OutputNewRoomEvent) error {
@ -164,6 +189,10 @@ func (s *OutputRoomEventConsumer) processMessage(ore api.OutputNewRoomEvent) err
return err
}
// TODO: do housekeeping to evict unrenewed peeking hosts
// TODO: implement query to let the fedapi check whether a given peek is live or not
// Send the event.
return s.queues.SendEvent(
ore.Event, gomatrixserverlib.ServerName(ore.SendAsServer), joinedHostsAtEvent,
@ -171,7 +200,7 @@ func (s *OutputRoomEventConsumer) processMessage(ore api.OutputNewRoomEvent) err
}
// joinedHostsAtEvent works out a list of matrix servers that were joined to
// the room at the event.
// the room at the event (including peeking ones)
// It is important to use the state at the event for sending messages because:
// 1) We shouldn't send messages to servers that weren't in the room.
// 2) If a server is kicked from the rooms it should still be told about the
@ -222,6 +251,15 @@ func (s *OutputRoomEventConsumer) joinedHostsAtEvent(
joined[joinedHost.ServerName] = true
}
// handle peeking hosts
inboundPeeks, err := s.db.GetInboundPeeks(context.TODO(), ore.Event.Event.RoomID())
if err != nil {
return nil, err
}
for _, inboundPeek := range inboundPeeks {
joined[inboundPeek.ServerName] = true
}
var result []gomatrixserverlib.ServerName
for serverName, include := range joined {
if include {