Protect processEventWithMissingState with per-room mutex, to prevent mass CPU burn/RAM usage

Squashed commit of the following:

commit 7fad77c10e
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 15:06:52 2021 +0100

    Fix processEventWithMissingStateMutexes

commit 138cddcac7
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:59:44 2021 +0100

    Use internal.MutexByRoom

commit 6e6f026cfa
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:50:18 2021 +0100

    Try to slow things down per room

commit b97d406dff
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:41:27 2021 +0100

    Try to slow things down

commit 8866120ebf
Merge: 9f2de8a2 4a37b19a
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:40:33 2021 +0100

    Merge branch 'neilalexander/rsinputfifo' into neilalexander/rsinputfifo2

commit 4a37b19a8f
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:34:54 2021 +0100

    Add comments

commit f9ab3f4b81
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:31:21 2021 +0100

    Tweaks

commit 9f2de8a29c
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 13:15:59 2021 +0100

    Ask origin only for missing things for now

commit 8fd878c75a
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 11:18:11 2021 +0100

    Make sure someone wakes up

commit b63f699f1b
Author: Neil Alexander <neilalexander@users.noreply.github.com>
Date:   Mon Jun 28 11:12:58 2021 +0100

    Use a FIFO queue instead of a channel to reduce backpressure
This commit is contained in:
Neil Alexander 2021-06-28 15:11:59 +01:00
parent 7c3991ee2f
commit 4417f24678
No known key found for this signature in database
GPG key ID: A02A2019A2BB0944

View file

@ -486,6 +486,7 @@ func (t *txnReq) getServers(ctx context.Context, roomID string) []gomatrixserver
return t.servers return t.servers
} }
t.servers = []gomatrixserverlib.ServerName{t.Origin} t.servers = []gomatrixserverlib.ServerName{t.Origin}
/*
serverReq := &api.QueryServerJoinedToRoomRequest{ serverReq := &api.QueryServerJoinedToRoomRequest{
RoomID: roomID, RoomID: roomID,
} }
@ -494,6 +495,7 @@ func (t *txnReq) getServers(ctx context.Context, roomID string) []gomatrixserver
t.servers = append(t.servers, serverRes.ServerNames...) t.servers = append(t.servers, serverRes.ServerNames...)
util.GetLogger(ctx).Infof("Found %d server(s) to query for missing events in %q", len(t.servers), roomID) util.GetLogger(ctx).Infof("Found %d server(s) to query for missing events in %q", len(t.servers), roomID)
} }
*/
return t.servers return t.servers
} }
@ -618,13 +620,18 @@ func checkAllowedByState(e *gomatrixserverlib.Event, stateEvents []*gomatrixserv
return gomatrixserverlib.Allowed(e, &authUsingState) return gomatrixserverlib.Allowed(e, &authUsingState)
} }
var processEventWithMissingStateMutexes = internal.NewMutexByRoom()
func (t *txnReq) processEventWithMissingState( func (t *txnReq) processEventWithMissingState(
ctx context.Context, e *gomatrixserverlib.Event, roomVersion gomatrixserverlib.RoomVersion, ctx context.Context, e *gomatrixserverlib.Event, roomVersion gomatrixserverlib.RoomVersion,
) error { ) error {
processEventWithMissingStateMutexes.Lock(e.RoomID())
defer processEventWithMissingStateMutexes.Unlock(e.RoomID())
// Do this with a fresh context, so that we keep working even if the // Do this with a fresh context, so that we keep working even if the
// original request times out. With any luck, by the time the remote // original request times out. With any luck, by the time the remote
// side retries, we'll have fetched the missing state. // side retries, we'll have fetched the missing state.
gmectx, cancel := context.WithTimeout(context.Background(), time.Minute*5) gmectx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel() defer cancel()
// We are missing the previous events for this events. // We are missing the previous events for this events.
// This means that there is a gap in our view of the history of the // This means that there is a gap in our view of the history of the