Full roomserver input transactional isolation (#2141)

* Add transaction to all database tables in roomserver, rename latest events updater to room updater, use room updater for all RS input

* Better transaction management

* Tweak order

* Handle cases where the room does not exist

* Other fixes

* More tweaks

* Fill some gaps

* Fill in the gaps

* good lord it gets worse

* Don't roll back transactions when events rejected

* Pass through errors properly

* Fix bugs

* Fix incorrect error check

* Don't panic on nil txns

* Tweaks

* Hopefully fix panics for good in SQLite this time

* Fix rollback

* Minor bug fixes with latest event updater

* Some review comments

* Revert "Some review comments"

This reverts commit 0caf8cf53e62c33f7b83c52e9df1d963871f751e.

* Fix a couple of bugs

* Clearer commit and rollback results

* Remove unnecessary prepares
This commit is contained in:
Neil Alexander 2022-02-04 10:39:34 +00:00 committed by GitHub
parent 4d9f5b2e57
commit eb352a5f6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 867 additions and 499 deletions

View file

@ -19,6 +19,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"sync"
"time"
@ -38,6 +39,19 @@ import (
"github.com/tidwall/gjson"
)
type retryAction int
type commitAction int
const (
doNotRetry retryAction = iota
retryLater
)
const (
commitTransaction commitAction = iota
rollbackTransaction
)
var keyContentFields = map[string]string{
"m.room.join_rules": "join_rule",
"m.room.history_visibility": "history_visibility",
@ -101,7 +115,8 @@ func (r *Inputer) Start() error {
_ = msg.InProgress() // resets the acknowledgement wait timer
defer eventsInProgress.Delete(index)
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
if err := r.processRoomEvent(context.Background(), &inputRoomEvent); err != nil {
action, err := r.processRoomEventUsingUpdater(context.Background(), roomID, &inputRoomEvent)
if err != nil {
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
sentry.CaptureException(err)
}
@ -111,7 +126,12 @@ func (r *Inputer) Start() error {
"type": inputRoomEvent.Event.Type(),
}).Warn("Roomserver failed to process async event")
}
_ = msg.Ack()
switch action {
case retryLater:
_ = msg.Nak()
case doNotRetry:
_ = msg.Ack()
}
})
},
// NATS wants to acknowledge automatically by default when the message is
@ -131,6 +151,37 @@ func (r *Inputer) Start() error {
return err
}
// processRoomEventUsingUpdater opens up a room updater and tries to
// process the event. It returns whether or not we should positively
// or negatively acknowledge the event (i.e. for NATS) and an error
// if it occurred.
func (r *Inputer) processRoomEventUsingUpdater(
ctx context.Context,
roomID string,
inputRoomEvent *api.InputRoomEvent,
) (retryAction, error) {
roomInfo, err := r.DB.RoomInfo(ctx, roomID)
if err != nil {
return doNotRetry, fmt.Errorf("r.DB.RoomInfo: %w", err)
}
updater, err := r.DB.GetRoomUpdater(ctx, roomInfo)
if err != nil {
return retryLater, fmt.Errorf("r.DB.GetRoomUpdater: %w", err)
}
action, err := r.processRoomEvent(ctx, updater, inputRoomEvent)
switch action {
case commitTransaction:
if cerr := updater.Commit(); cerr != nil {
return retryLater, fmt.Errorf("updater.Commit: %w", cerr)
}
case rollbackTransaction:
if rerr := updater.Rollback(); rerr != nil {
return retryLater, fmt.Errorf("updater.Rollback: %w", rerr)
}
}
return doNotRetry, err
}
// InputRoomEvents implements api.RoomserverInternalAPI
func (r *Inputer) InputRoomEvents(
ctx context.Context,
@ -177,7 +228,7 @@ func (r *Inputer) InputRoomEvents(
worker.Act(nil, func() {
defer eventsInProgress.Delete(index)
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
err := r.processRoomEvent(ctx, &inputRoomEvent)
_, err := r.processRoomEventUsingUpdater(ctx, roomID, &inputRoomEvent)
if err != nil {
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
sentry.CaptureException(err)