Set timeouts on roomserver input tasks (need to decide what timeout makes sense)

This commit is contained in:
Neil Alexander 2022-01-24 11:15:20 +00:00
parent c68037b3e8
commit 03a989d5c9
No known key found for this signature in database
GPG key ID: A02A2019A2BB0944
2 changed files with 22 additions and 4 deletions

View file

@ -19,6 +19,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"sync" "sync"
"time"
"github.com/Arceliar/phony" "github.com/Arceliar/phony"
"github.com/getsentry/sentry-go" "github.com/getsentry/sentry-go"
@ -42,6 +43,9 @@ var keyContentFields = map[string]string{
"m.room.member": "membership", "m.room.member": "membership",
} }
// TODO: Does this value make sense?
const MaximumProcessingTime = time.Minute
type Inputer struct { type Inputer struct {
DB storage.Database DB storage.Database
JetStream nats.JetStreamContext JetStream nats.JetStreamContext
@ -78,9 +82,11 @@ func (r *Inputer) Start() error {
} }
roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc() roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc()
worker := r.workerForRoom(roomID) worker := r.workerForRoom(roomID)
worker.Act(worker, func() { worker.Act(nil, func() {
ctx, cancel := context.WithTimeout(context.Background(), MaximumProcessingTime)
defer cancel()
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec() defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
if err := r.processRoomEvent(context.TODO(), &inputRoomEvent); err != nil { if err := r.processRoomEvent(ctx, &inputRoomEvent); err != nil {
sentry.CaptureException(err) sentry.CaptureException(err)
} else { } else {
hooks.Run(hooks.KindNewEventPersisted, inputRoomEvent.Event) hooks.Run(hooks.KindNewEventPersisted, inputRoomEvent.Event)
@ -99,6 +105,9 @@ func (r *Inputer) Start() error {
nats.MaxDeliver(0), nats.MaxDeliver(0),
// Use a durable named consumer. // Use a durable named consumer.
r.Durable, r.Durable,
// Only process one message at a time, rather than have NATS flood us with
// more messages when we're still busy working on the last one.
nats.MaxAckPending(1),
) )
return err return err
} }
@ -135,9 +144,11 @@ func (r *Inputer) InputRoomEvents(
roomID := inputRoomEvent.Event.RoomID() roomID := inputRoomEvent.Event.RoomID()
roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc() roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc()
worker := r.workerForRoom(roomID) worker := r.workerForRoom(roomID)
worker.Act(worker, func() { worker.Act(nil, func() {
reqctx, cancel := context.WithTimeout(ctx, MaximumProcessingTime)
defer cancel()
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec() defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
err := r.processRoomEvent(ctx, &inputRoomEvent) err := r.processRoomEvent(reqctx, &inputRoomEvent)
if err != nil { if err != nil {
sentry.CaptureException(err) sentry.CaptureException(err)
} else { } else {

View file

@ -65,6 +65,13 @@ func (r *Inputer) processRoomEvent(
ctx context.Context, ctx context.Context,
input *api.InputRoomEvent, input *api.InputRoomEvent,
) (err error) { ) (err error) {
// Before we do anything, make sure the context hasn't expired for this pending task.
select {
case <-ctx.Done():
return context.DeadlineExceeded
default:
}
// Measure how long it takes to process this event. // Measure how long it takes to process this event.
started := time.Now() started := time.Now()
defer func() { defer func() {