Roomserver/federation input refactor (#2104)

* Put federation client functions into their own file

* Look for missing auth events in RS input

* Remove retrieveMissingAuthEvents from federation API

* Logging

* Sorta transplanted the code over

* Use event origin failing all else

* Don't get stuck on mutexes:

* Add verifier

* Don't mark state events with zero snapshot NID as not existing

* Check missing state if not an outlier before storing the event

* Reject instead of soft-fail, don't copy roominfo so much

* Use synchronous contexts, limit time to fetch missing events

* Clean up some commented out bits

* Simplify `/send` endpoint significantly

* Submit async

* Report errors on sending to RS input

* Set max payload in NATS to 16MB

* Tweak metrics

* Add `workerForRoom` for tidiness

* Try skipping unmarshalling errors for RespMissingEvents

* Track missing prev events separately to avoid calculating state when not possible

* Tweak logic around checking missing state

* Care about state when checking missing prev events

* Don't check missing state for create events

* Try that again

* Handle create events better

* Send create room events as new

* Use given event kind when sending auth/state events

* Revert "Use given event kind when sending auth/state events"

This reverts commit 089d64d271.

* Only search for missing prev events or state for new events

* Tweaks

* We only have missing prev if we don't supply state

* Room version tweaks

* Allow async inputs again

* Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed

* Set timeouts on roomserver input tasks (need to decide what timeout makes sense)

* Use work queue policy, deliver all on restart

* Reduce chance of duplicates being sent by NATS

* Limit the number of servers we attempt to reduce backpressure

* Some review comment fixes

* Tidy up a couple things

* Don't limit servers, randomise order using map

* Some context refactoring

* Update gmsl

* Don't resend create events

* Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't

* Exclude our own servername

* Try backing off servers

* Make excluding self behaviour optional

* Exclude self from g_m_e

* Update sytest-whitelist

* Update consumers for the roomserver output stream

* Remember to send outliers for state returned from /gme

* Make full HTTP tests less upsetti

* Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist

* Remove debugging test

* Fix blacklist again, remove unnecessary duplicate context

* Clearer contexts, don't use background in case there's something happening there

* Don't queue up events more than once in memory

* Correctly identify create events when checking for state

* Fill in gaps again in /gme code

* Remove `AuthEventIDs` from `InputRoomEvent`

* Remove stray field

Co-authored-by: Kegan Dougal <kegan@matrix.org>
This commit is contained in:
Neil Alexander 2022-01-27 14:29:14 +00:00 committed by GitHub
parent 5b4999afa9
commit a763cbb0e1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
46 changed files with 1549 additions and 1285 deletions

View file

@ -63,7 +63,7 @@ func (r *Queryer) QueryStateAfterEvents(
return nil
}
roomState := state.NewStateResolution(r.DB, *info)
roomState := state.NewStateResolution(r.DB, info)
response.RoomExists = true
response.RoomVersion = info.RoomVersion
@ -294,7 +294,7 @@ func (r *Queryer) QueryMembershipsForRoom(
events, err = r.DB.Events(ctx, eventNIDs)
} else {
stateEntries, err = helpers.StateBeforeEvent(ctx, r.DB, *info, membershipEventNID)
stateEntries, err = helpers.StateBeforeEvent(ctx, r.DB, info, membershipEventNID)
if err != nil {
logrus.WithField("membership_event_nid", membershipEventNID).WithError(err).Error("failed to load state before event")
return err
@ -377,7 +377,7 @@ func (r *Queryer) QueryServerAllowedToSeeEvent(
return fmt.Errorf("QueryServerAllowedToSeeEvent: no room info for room %s", roomID)
}
response.AllowedToSeeEvent, err = helpers.CheckServerAllowedToSeeEvent(
ctx, r.DB, *info, request.EventID, request.ServerName, inRoomRes.IsInRoom,
ctx, r.DB, info, request.EventID, request.ServerName, inRoomRes.IsInRoom,
)
return
}
@ -416,7 +416,7 @@ func (r *Queryer) QueryMissingEvents(
return fmt.Errorf("missing RoomInfo for room %s", events[0].RoomID())
}
resultNIDs, err := helpers.ScanEventTree(ctx, r.DB, *info, front, visited, request.Limit, request.ServerName)
resultNIDs, err := helpers.ScanEventTree(ctx, r.DB, info, front, visited, request.Limit, request.ServerName)
if err != nil {
return err
}
@ -473,7 +473,7 @@ func (r *Queryer) QueryStateAndAuthChain(
}
var stateEvents []*gomatrixserverlib.Event
stateEvents, err = r.loadStateAtEventIDs(ctx, *info, request.PrevEventIDs)
stateEvents, err = r.loadStateAtEventIDs(ctx, info, request.PrevEventIDs)
if err != nil {
return err
}
@ -512,7 +512,7 @@ func (r *Queryer) QueryStateAndAuthChain(
return err
}
func (r *Queryer) loadStateAtEventIDs(ctx context.Context, roomInfo types.RoomInfo, eventIDs []string) ([]*gomatrixserverlib.Event, error) {
func (r *Queryer) loadStateAtEventIDs(ctx context.Context, roomInfo *types.RoomInfo, eventIDs []string) ([]*gomatrixserverlib.Event, error) {
roomState := state.NewStateResolution(r.DB, roomInfo)
prevStates, err := r.DB.StateAtEventIDs(ctx, eventIDs)
if err != nil {