dendrite/federationapi/routing/send.go

465 lines
16 KiB
Go
Raw Normal View History

// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package routing
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"time"
"github.com/matrix-org/dendrite/clientapi/jsonerror"
eduserverAPI "github.com/matrix-org/dendrite/eduserver/api"
federationAPI "github.com/matrix-org/dendrite/federationapi/api"
"github.com/matrix-org/dendrite/internal"
keyapi "github.com/matrix-org/dendrite/keyserver/api"
"github.com/matrix-org/dendrite/roomserver/api"
"github.com/matrix-org/dendrite/setup/config"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
const (
// Event was passed to the roomserver
MetricsOutcomeOK = "ok"
// Event failed to be processed
MetricsOutcomeFail = "fail"
// Event failed auth checks
MetricsOutcomeRejected = "rejected"
// Terminated the transaction
MetricsOutcomeFatal = "fatal"
// The event has missing auth_events we need to fetch
MetricsWorkMissingAuthEvents = "missing_auth_events"
// No work had to be done as we had all prev/auth events
MetricsWorkDirect = "direct"
// The event has missing prev_events we need to call /g_m_e for
MetricsWorkMissingPrevEvents = "missing_prev_events"
)
var (
pduCountTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "dendrite",
Subsystem: "federationapi",
Name: "recv_pdus",
Help: "Number of incoming PDUs from remote servers with labels for success",
},
[]string{"status"}, // 'success' or 'total'
)
eduCountTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "dendrite",
Subsystem: "federationapi",
Name: "recv_edus",
Help: "Number of incoming EDUs from remote servers",
},
)
)
func init() {
prometheus.MustRegister(
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
pduCountTotal, eduCountTotal,
)
}
var inFlightTxnsPerOrigin sync.Map // transaction ID -> chan util.JSONResponse
// Send implements /_matrix/federation/v1/send/{txnID}
func Send(
httpReq *http.Request,
request *gomatrixserverlib.FederationRequest,
txnID gomatrixserverlib.TransactionID,
cfg *config.FederationAPI,
rsAPI api.RoomserverInternalAPI,
eduAPI eduserverAPI.EDUServerInputAPI,
keyAPI keyapi.KeyInternalAPI,
keys gomatrixserverlib.JSONVerifier,
federation *gomatrixserverlib.FederationClient,
mu *internal.MutexByRoom,
servers federationAPI.ServersInRoomProvider,
) util.JSONResponse {
// First we should check if this origin has already submitted this
// txn ID to us. If they have and the txnIDs map contains an entry,
// the transaction is still being worked on. The new client can wait
// for it to complete rather than creating more work.
index := string(request.Origin()) + "\000" + string(txnID)
v, ok := inFlightTxnsPerOrigin.LoadOrStore(index, make(chan util.JSONResponse, 1))
ch := v.(chan util.JSONResponse)
if ok {
// This origin already submitted this txn ID to us, and the work
// is still taking place, so we'll just wait for it to finish.
ctx, cancel := context.WithTimeout(httpReq.Context(), time.Minute*5)
defer cancel()
select {
case <-ctx.Done():
// If the caller gives up then return straight away. We don't
// want to attempt to process what they sent us any further.
return util.JSONResponse{Code: http.StatusRequestTimeout}
case res := <-ch:
// The original task just finished processing so let's return
// the result of it.
if res.Code == 0 {
return util.JSONResponse{Code: http.StatusAccepted}
}
return res
}
}
// Otherwise, store that we're currently working on this txn from
// this origin. When we're done processing, close the channel.
defer close(ch)
defer inFlightTxnsPerOrigin.Delete(index)
t := txnReq{
rsAPI: rsAPI,
eduAPI: eduAPI,
keys: keys,
federation: federation,
servers: servers,
keyAPI: keyAPI,
roomsMu: mu,
}
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
var txnEvents struct {
PDUs []json.RawMessage `json:"pdus"`
EDUs []gomatrixserverlib.EDU `json:"edus"`
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
}
if err := json.Unmarshal(request.Content(), &txnEvents); err != nil {
return util.JSONResponse{
Code: http.StatusBadRequest,
JSON: jsonerror.NotJSON("The request body could not be decoded into valid JSON. " + err.Error()),
}
}
// Transactions are limited in size; they can have at most 50 PDUs and 100 EDUs.
// https://matrix.org/docs/spec/server_server/latest#transactions
if len(txnEvents.PDUs) > 50 || len(txnEvents.EDUs) > 100 {
return util.JSONResponse{
Code: http.StatusBadRequest,
JSON: jsonerror.BadJSON("max 50 pdus / 100 edus"),
}
}
// TODO: Really we should have a function to convert FederationRequest to txnReq
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
t.PDUs = txnEvents.PDUs
t.EDUs = txnEvents.EDUs
t.Origin = request.Origin()
t.TransactionID = txnID
t.Destination = cfg.Matrix.ServerName
util.GetLogger(httpReq.Context()).Debugf("Received transaction %q from %q containing %d PDUs, %d EDUs", txnID, request.Origin(), len(t.PDUs), len(t.EDUs))
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
resp, jsonErr := t.processTransaction(httpReq.Context())
if jsonErr != nil {
util.GetLogger(httpReq.Context()).WithField("jsonErr", jsonErr).Error("t.processTransaction failed")
return *jsonErr
}
// https://matrix.org/docs/spec/server_server/r0.1.3#put-matrix-federation-v1-send-txnid
// Status code 200:
// The result of processing the transaction. The server is to use this response
// even in the event of one or more PDUs failing to be processed.
res := util.JSONResponse{
Code: http.StatusOK,
JSON: resp,
}
ch <- res
return res
}
type txnReq struct {
gomatrixserverlib.Transaction
rsAPI api.RoomserverInternalAPI
eduAPI eduserverAPI.EDUServerInputAPI
keyAPI keyapi.KeyInternalAPI
keys gomatrixserverlib.JSONVerifier
federation txnFederationClient
roomsMu *internal.MutexByRoom
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
servers federationAPI.ServersInRoomProvider
}
// A subset of FederationClient functionality that txn requires. Useful for testing.
type txnFederationClient interface {
LookupState(ctx context.Context, s gomatrixserverlib.ServerName, roomID string, eventID string, roomVersion gomatrixserverlib.RoomVersion) (
res gomatrixserverlib.RespState, err error,
)
LookupStateIDs(ctx context.Context, s gomatrixserverlib.ServerName, roomID string, eventID string) (res gomatrixserverlib.RespStateIDs, err error)
GetEvent(ctx context.Context, s gomatrixserverlib.ServerName, eventID string) (res gomatrixserverlib.Transaction, err error)
LookupMissingEvents(ctx context.Context, s gomatrixserverlib.ServerName, roomID string, missing gomatrixserverlib.MissingEvents,
roomVersion gomatrixserverlib.RoomVersion) (res gomatrixserverlib.RespMissingEvents, err error)
}
func (t *txnReq) processTransaction(ctx context.Context) (*gomatrixserverlib.RespSend, *util.JSONResponse) {
var wg sync.WaitGroup
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
wg.Add(1)
go func() {
defer wg.Done()
t.processEDUs(ctx)
}()
results := make(map[string]gomatrixserverlib.PDUResult)
roomVersions := make(map[string]gomatrixserverlib.RoomVersion)
getRoomVersion := func(roomID string) gomatrixserverlib.RoomVersion {
if v, ok := roomVersions[roomID]; ok {
return v
}
verReq := api.QueryRoomVersionForRoomRequest{RoomID: roomID}
verRes := api.QueryRoomVersionForRoomResponse{}
if err := t.rsAPI.QueryRoomVersionForRoom(ctx, &verReq, &verRes); err != nil {
util.GetLogger(ctx).WithError(err).Debug("Transaction: Failed to query room version for room", verReq.RoomID)
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
return ""
}
roomVersions[roomID] = verRes.RoomVersion
return verRes.RoomVersion
}
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
for _, pdu := range t.PDUs {
pduCountTotal.WithLabelValues("total").Inc()
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
var header struct {
RoomID string `json:"room_id"`
}
if err := json.Unmarshal(pdu, &header); err != nil {
util.GetLogger(ctx).WithError(err).Debug("Transaction: Failed to extract room ID from event")
// We don't know the event ID at this point so we can't return the
// failure in the PDU results
continue
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
}
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
roomVersion := getRoomVersion(header.RoomID)
event, err := gomatrixserverlib.NewEventFromUntrustedJSON(pdu, roomVersion)
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
if err != nil {
if _, ok := err.(gomatrixserverlib.BadJSONError); ok {
// Room version 6 states that homeservers should strictly enforce canonical JSON
// on PDUs.
//
// This enforces that the entire transaction is rejected if a single bad PDU is
// sent. It is unclear if this is the correct behaviour or not.
//
// See https://github.com/matrix-org/synapse/issues/7543
return nil, &util.JSONResponse{
Code: 400,
JSON: jsonerror.BadJSON("PDU contains bad JSON"),
}
}
util.GetLogger(ctx).WithError(err).Debugf("Transaction: Failed to parse event JSON of event %s", string(pdu))
continue
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
}
if event.Type() == gomatrixserverlib.MRoomCreate && event.StateKeyEquals("") {
continue
}
if api.IsServerBannedFromRoom(ctx, t.rsAPI, event.RoomID(), t.Origin) {
results[event.EventID()] = gomatrixserverlib.PDUResult{
Error: "Forbidden by server ACLs",
}
continue
}
if err = event.VerifyEventSignatures(ctx, t.keys); err != nil {
util.GetLogger(ctx).WithError(err).Debugf("Transaction: Couldn't validate signature of event %q", event.EventID())
results[event.EventID()] = gomatrixserverlib.PDUResult{
Error: err.Error(),
}
continue
Further room version wiring (#936) * Room version 2 by default, other wiring updates, update gomatrixserverlib * Fix nil pointer exception * Fix some more nil pointer exceptions hopefully * Update gomatrixserverlib * Send all room versions when joining, not just stable ones * Remove room version cquery * Get room version when getting events from the roomserver database * Reset default back to room version 2 * Don't generate event IDs unless needed * Revert "Remove room version cquery" This reverts commit a170d5873360dd059614460acc8b21ab2cda9767. * Query room version in federation API, client API as needed * Improvements to make_join send_join dance * Make room server producers use headered events * Lint tweaks * Update gomatrixserverlib * Versioned SendJoin * Query room version in syncapi backfill * Handle transaction marshalling/unmarshalling within Dendrite * Sorta fix federation (kinda) * whoops commit federation API too * Use NewEventFromTrustedJSON when getting events from the database * Update gomatrixserverlib * Strip headers on federationapi endpoints * Fix bug in clientapi profile room version query * Update gomatrixserverlib * Return more useful error if room version query doesn't find the room * Update gomatrixserverlib * Update gomatrixserverlib * Maybe fix federation * Fix formatting directive * Update sytest whitelist and blacklist * Temporarily disable room versions 3 and 4 until gmsl is fixed * Fix count of EDUs in logging * Update gomatrixserverlib * Update gomatrixserverlib * Update gomatrixserverlib * Rely on EventBuilder in gmsl to generate the event IDs for us * Some review comments fixed * Move function out of common and into gmsl * Comment in federationsender destinationqueue * Update gomatrixserverlib
2020-03-27 16:28:22 +00:00
}
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
// pass the event to the roomserver which will do auth checks
// If the event fail auth checks, gmsl.NotAllowed error will be returned which we be silently
// discarded by the caller of this function
if err = api.SendEvents(
ctx,
t.rsAPI,
api.KindNew,
[]*gomatrixserverlib.HeaderedEvent{
event.Headered(roomVersion),
},
t.Origin,
api.DoNotSendToOtherServers,
nil,
true,
); err != nil {
util.GetLogger(ctx).WithError(err).Errorf("Transaction: Couldn't submit event %q to input queue: %s", event.EventID(), err)
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
results[event.EventID()] = gomatrixserverlib.PDUResult{
Error: err.Error(),
}
continue
}
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
results[event.EventID()] = gomatrixserverlib.PDUResult{}
pduCountTotal.WithLabelValues("success").Inc()
}
Roomserver/federation input refactor (#2104) * Put federation client functions into their own file * Look for missing auth events in RS input * Remove retrieveMissingAuthEvents from federation API * Logging * Sorta transplanted the code over * Use event origin failing all else * Don't get stuck on mutexes: * Add verifier * Don't mark state events with zero snapshot NID as not existing * Check missing state if not an outlier before storing the event * Reject instead of soft-fail, don't copy roominfo so much * Use synchronous contexts, limit time to fetch missing events * Clean up some commented out bits * Simplify `/send` endpoint significantly * Submit async * Report errors on sending to RS input * Set max payload in NATS to 16MB * Tweak metrics * Add `workerForRoom` for tidiness * Try skipping unmarshalling errors for RespMissingEvents * Track missing prev events separately to avoid calculating state when not possible * Tweak logic around checking missing state * Care about state when checking missing prev events * Don't check missing state for create events * Try that again * Handle create events better * Send create room events as new * Use given event kind when sending auth/state events * Revert "Use given event kind when sending auth/state events" This reverts commit 089d64d271b5fca8c104e1554711187420dbebca. * Only search for missing prev events or state for new events * Tweaks * We only have missing prev if we don't supply state * Room version tweaks * Allow async inputs again * Apply backpressure to consumers/synchronous requests to hopefully stop things being overwhelmed * Set timeouts on roomserver input tasks (need to decide what timeout makes sense) * Use work queue policy, deliver all on restart * Reduce chance of duplicates being sent by NATS * Limit the number of servers we attempt to reduce backpressure * Some review comment fixes * Tidy up a couple things * Don't limit servers, randomise order using map * Some context refactoring * Update gmsl * Don't resend create events * Set stateIDs length correctly or else the roomserver thinks there are missing events when there aren't * Exclude our own servername * Try backing off servers * Make excluding self behaviour optional * Exclude self from g_m_e * Update sytest-whitelist * Update consumers for the roomserver output stream * Remember to send outliers for state returned from /gme * Make full HTTP tests less upsetti * Remove 'If a device list update goes missing, the server resyncs on the next one' from the sytest blacklist * Remove debugging test * Fix blacklist again, remove unnecessary duplicate context * Clearer contexts, don't use background in case there's something happening there * Don't queue up events more than once in memory * Correctly identify create events when checking for state * Fill in gaps again in /gme code * Remove `AuthEventIDs` from `InputRoomEvent` * Remove stray field Co-authored-by: Kegan Dougal <kegan@matrix.org>
2022-01-27 14:29:14 +00:00
wg.Wait()
return &gomatrixserverlib.RespSend{PDUs: results}, nil
}
func (t *txnReq) processEDUs(ctx context.Context) {
for _, e := range t.EDUs {
eduCountTotal.Inc()
switch e.Type {
case gomatrixserverlib.MTyping:
// https://matrix.org/docs/spec/server_server/latest#typing-notifications
var typingPayload struct {
RoomID string `json:"room_id"`
UserID string `json:"user_id"`
Typing bool `json:"typing"`
}
if err := json.Unmarshal(e.Content, &typingPayload); err != nil {
util.GetLogger(ctx).WithError(err).Debug("Failed to unmarshal typing event")
continue
}
_, domain, err := gomatrixserverlib.SplitID('@', typingPayload.UserID)
if err != nil {
util.GetLogger(ctx).WithError(err).Debug("Failed to split domain from typing event sender")
continue
}
if domain != t.Origin {
util.GetLogger(ctx).Debugf("Dropping typing event where sender domain (%q) doesn't match origin (%q)", domain, t.Origin)
continue
}
if err := eduserverAPI.SendTyping(ctx, t.eduAPI, typingPayload.UserID, typingPayload.RoomID, typingPayload.Typing, 30*1000); err != nil {
util.GetLogger(ctx).WithError(err).Error("Failed to send typing event to edu server")
}
Send-to-device support (#1072) * Groundwork for send-to-device messaging * Update sample config * Add unstable routing for now * Send to device consumer in sync API * Start the send-to-device consumer * fix indentation in dendrite-config.yaml * Create send-to-device database tables, other tweaks * Add some logic for send-to-device messages, add them into sync stream * Handle incoming send-to-device messages, count them with EDU stream pos * Undo changes to test * pq.Array * Fix sync * Logging * Fix a couple of transaction things, fix client API * Add send-to-device test, hopefully fix bugs * Comments * Refactor a bit * Fix schema * Fix queries * Debug logging * Fix storing and retrieving of send-to-device messages * Try to avoid database locks * Update sync position * Use latest sync position * Jiggle about sync a bit * Fix tests * Break out the retrieval from the update/delete behaviour * Comments * nolint on getResponseWithPDUsForCompleteSync * Try to line up sync tokens again * Implement wildcard * Add all send-to-device tests to whitelist, what could possibly go wrong? * Only care about wildcard when targeted locally * Deduplicate transactions * Handle tokens properly, return immediately if waiting send-to-device messages * Fix sync * Update sytest-whitelist * Fix copyright notice (need to do more of this) * Comments, copyrights * Return errors from Do, fix dendritejs * Review comments * Comments * Constructor for TransactionWriter * defletions * Update gomatrixserverlib, sytest-blacklist
2020-06-01 16:50:19 +00:00
case gomatrixserverlib.MDirectToDevice:
// https://matrix.org/docs/spec/server_server/r0.1.3#m-direct-to-device-schema
var directPayload gomatrixserverlib.ToDeviceMessage
if err := json.Unmarshal(e.Content, &directPayload); err != nil {
util.GetLogger(ctx).WithError(err).Debug("Failed to unmarshal send-to-device events")
Send-to-device support (#1072) * Groundwork for send-to-device messaging * Update sample config * Add unstable routing for now * Send to device consumer in sync API * Start the send-to-device consumer * fix indentation in dendrite-config.yaml * Create send-to-device database tables, other tweaks * Add some logic for send-to-device messages, add them into sync stream * Handle incoming send-to-device messages, count them with EDU stream pos * Undo changes to test * pq.Array * Fix sync * Logging * Fix a couple of transaction things, fix client API * Add send-to-device test, hopefully fix bugs * Comments * Refactor a bit * Fix schema * Fix queries * Debug logging * Fix storing and retrieving of send-to-device messages * Try to avoid database locks * Update sync position * Use latest sync position * Jiggle about sync a bit * Fix tests * Break out the retrieval from the update/delete behaviour * Comments * nolint on getResponseWithPDUsForCompleteSync * Try to line up sync tokens again * Implement wildcard * Add all send-to-device tests to whitelist, what could possibly go wrong? * Only care about wildcard when targeted locally * Deduplicate transactions * Handle tokens properly, return immediately if waiting send-to-device messages * Fix sync * Update sytest-whitelist * Fix copyright notice (need to do more of this) * Comments, copyrights * Return errors from Do, fix dendritejs * Review comments * Comments * Constructor for TransactionWriter * defletions * Update gomatrixserverlib, sytest-blacklist
2020-06-01 16:50:19 +00:00
continue
}
for userID, byUser := range directPayload.Messages {
for deviceID, message := range byUser {
// TODO: check that the user and the device actually exist here
if err := eduserverAPI.SendToDevice(ctx, t.eduAPI, directPayload.Sender, userID, deviceID, directPayload.Type, message); err != nil {
util.GetLogger(ctx).WithError(err).WithFields(logrus.Fields{
Send-to-device support (#1072) * Groundwork for send-to-device messaging * Update sample config * Add unstable routing for now * Send to device consumer in sync API * Start the send-to-device consumer * fix indentation in dendrite-config.yaml * Create send-to-device database tables, other tweaks * Add some logic for send-to-device messages, add them into sync stream * Handle incoming send-to-device messages, count them with EDU stream pos * Undo changes to test * pq.Array * Fix sync * Logging * Fix a couple of transaction things, fix client API * Add send-to-device test, hopefully fix bugs * Comments * Refactor a bit * Fix schema * Fix queries * Debug logging * Fix storing and retrieving of send-to-device messages * Try to avoid database locks * Update sync position * Use latest sync position * Jiggle about sync a bit * Fix tests * Break out the retrieval from the update/delete behaviour * Comments * nolint on getResponseWithPDUsForCompleteSync * Try to line up sync tokens again * Implement wildcard * Add all send-to-device tests to whitelist, what could possibly go wrong? * Only care about wildcard when targeted locally * Deduplicate transactions * Handle tokens properly, return immediately if waiting send-to-device messages * Fix sync * Update sytest-whitelist * Fix copyright notice (need to do more of this) * Comments, copyrights * Return errors from Do, fix dendritejs * Review comments * Comments * Constructor for TransactionWriter * defletions * Update gomatrixserverlib, sytest-blacklist
2020-06-01 16:50:19 +00:00
"sender": directPayload.Sender,
"user_id": userID,
"device_id": deviceID,
}).Error("Failed to send send-to-device event to edu server")
}
}
}
case gomatrixserverlib.MDeviceListUpdate:
t.processDeviceListUpdate(ctx, e)
Implement read receipts (#1528) * fix conversion from int to string yields a string of one rune, not a string of digits * Add receipts table to syncapi * Use StreamingToken as the since value * Add required method to testEDUProducer * Make receipt json creation "easier" to read * Add receipts api to the eduserver * Add receipts endpoint * Add eduserver kafka consumer * Add missing kafka config * Add passing tests to whitelist Signed-off-by: Till Faelligen <tfaelligen@gmail.com> * Fix copy & paste error * Fix column count error * Make outbound federation receipts pass * Make "Inbound federation rejects receipts from wrong remote" pass * Don't use errors package * - Add TODO for batching requests - Rename variable * Return a better error message * - Use OutputReceiptEvent instead of InputReceiptEvent as result - Don't use the errors package for errors - Defer CloseAndLogIfError to close rows - Fix Copyright * Better creation/usage of JoinResponse * Query all joined rooms instead of just one * Update gomatrixserverlib * Add sqlite3 migration * Add postgres migration * Ensure required sequence exists before running migrations * Clarification on comment * - Fix a bug when creating client receipts - Use concrete types instead of interface{} * Remove dead code Use key for timestamp * Fix postgres query... * Remove single purpose struct * Use key/value directly * Only apply receipts on initial sync or if edu positions differ, otherwise we'll be sending the same receipts over and over again. * Actually update the id, so it is correctly send in syncs * Set receipt on request to /read_markers * Fix issue with receipts getting overwritten * Use fmt.Errorf instead of pkg/errors * Revert "Add postgres migration" This reverts commit 722fe5a04628882b787d096942459961db159b06. * Revert "Add sqlite3 migration" This reverts commit d113b03f6495a4b8f8bcf158a3d00b510b4240cc. * Fix selectRoomReceipts query * Make golangci-lint happy Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
2020-11-09 18:46:11 +00:00
case gomatrixserverlib.MReceipt:
// https://matrix.org/docs/spec/server_server/r0.1.4#receipts
payload := map[string]eduserverAPI.FederationReceiptMRead{}
if err := json.Unmarshal(e.Content, &payload); err != nil {
util.GetLogger(ctx).WithError(err).Debug("Failed to unmarshal receipt event")
Implement read receipts (#1528) * fix conversion from int to string yields a string of one rune, not a string of digits * Add receipts table to syncapi * Use StreamingToken as the since value * Add required method to testEDUProducer * Make receipt json creation "easier" to read * Add receipts api to the eduserver * Add receipts endpoint * Add eduserver kafka consumer * Add missing kafka config * Add passing tests to whitelist Signed-off-by: Till Faelligen <tfaelligen@gmail.com> * Fix copy & paste error * Fix column count error * Make outbound federation receipts pass * Make "Inbound federation rejects receipts from wrong remote" pass * Don't use errors package * - Add TODO for batching requests - Rename variable * Return a better error message * - Use OutputReceiptEvent instead of InputReceiptEvent as result - Don't use the errors package for errors - Defer CloseAndLogIfError to close rows - Fix Copyright * Better creation/usage of JoinResponse * Query all joined rooms instead of just one * Update gomatrixserverlib * Add sqlite3 migration * Add postgres migration * Ensure required sequence exists before running migrations * Clarification on comment * - Fix a bug when creating client receipts - Use concrete types instead of interface{} * Remove dead code Use key for timestamp * Fix postgres query... * Remove single purpose struct * Use key/value directly * Only apply receipts on initial sync or if edu positions differ, otherwise we'll be sending the same receipts over and over again. * Actually update the id, so it is correctly send in syncs * Set receipt on request to /read_markers * Fix issue with receipts getting overwritten * Use fmt.Errorf instead of pkg/errors * Revert "Add postgres migration" This reverts commit 722fe5a04628882b787d096942459961db159b06. * Revert "Add sqlite3 migration" This reverts commit d113b03f6495a4b8f8bcf158a3d00b510b4240cc. * Fix selectRoomReceipts query * Make golangci-lint happy Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
2020-11-09 18:46:11 +00:00
continue
}
for roomID, receipt := range payload {
for userID, mread := range receipt.User {
_, domain, err := gomatrixserverlib.SplitID('@', userID)
if err != nil {
util.GetLogger(ctx).WithError(err).Debug("Failed to split domain from receipt event sender")
Implement read receipts (#1528) * fix conversion from int to string yields a string of one rune, not a string of digits * Add receipts table to syncapi * Use StreamingToken as the since value * Add required method to testEDUProducer * Make receipt json creation "easier" to read * Add receipts api to the eduserver * Add receipts endpoint * Add eduserver kafka consumer * Add missing kafka config * Add passing tests to whitelist Signed-off-by: Till Faelligen <tfaelligen@gmail.com> * Fix copy & paste error * Fix column count error * Make outbound federation receipts pass * Make "Inbound federation rejects receipts from wrong remote" pass * Don't use errors package * - Add TODO for batching requests - Rename variable * Return a better error message * - Use OutputReceiptEvent instead of InputReceiptEvent as result - Don't use the errors package for errors - Defer CloseAndLogIfError to close rows - Fix Copyright * Better creation/usage of JoinResponse * Query all joined rooms instead of just one * Update gomatrixserverlib * Add sqlite3 migration * Add postgres migration * Ensure required sequence exists before running migrations * Clarification on comment * - Fix a bug when creating client receipts - Use concrete types instead of interface{} * Remove dead code Use key for timestamp * Fix postgres query... * Remove single purpose struct * Use key/value directly * Only apply receipts on initial sync or if edu positions differ, otherwise we'll be sending the same receipts over and over again. * Actually update the id, so it is correctly send in syncs * Set receipt on request to /read_markers * Fix issue with receipts getting overwritten * Use fmt.Errorf instead of pkg/errors * Revert "Add postgres migration" This reverts commit 722fe5a04628882b787d096942459961db159b06. * Revert "Add sqlite3 migration" This reverts commit d113b03f6495a4b8f8bcf158a3d00b510b4240cc. * Fix selectRoomReceipts query * Make golangci-lint happy Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
2020-11-09 18:46:11 +00:00
continue
}
if t.Origin != domain {
util.GetLogger(ctx).Debugf("Dropping receipt event where sender domain (%q) doesn't match origin (%q)", domain, t.Origin)
Implement read receipts (#1528) * fix conversion from int to string yields a string of one rune, not a string of digits * Add receipts table to syncapi * Use StreamingToken as the since value * Add required method to testEDUProducer * Make receipt json creation "easier" to read * Add receipts api to the eduserver * Add receipts endpoint * Add eduserver kafka consumer * Add missing kafka config * Add passing tests to whitelist Signed-off-by: Till Faelligen <tfaelligen@gmail.com> * Fix copy & paste error * Fix column count error * Make outbound federation receipts pass * Make "Inbound federation rejects receipts from wrong remote" pass * Don't use errors package * - Add TODO for batching requests - Rename variable * Return a better error message * - Use OutputReceiptEvent instead of InputReceiptEvent as result - Don't use the errors package for errors - Defer CloseAndLogIfError to close rows - Fix Copyright * Better creation/usage of JoinResponse * Query all joined rooms instead of just one * Update gomatrixserverlib * Add sqlite3 migration * Add postgres migration * Ensure required sequence exists before running migrations * Clarification on comment * - Fix a bug when creating client receipts - Use concrete types instead of interface{} * Remove dead code Use key for timestamp * Fix postgres query... * Remove single purpose struct * Use key/value directly * Only apply receipts on initial sync or if edu positions differ, otherwise we'll be sending the same receipts over and over again. * Actually update the id, so it is correctly send in syncs * Set receipt on request to /read_markers * Fix issue with receipts getting overwritten * Use fmt.Errorf instead of pkg/errors * Revert "Add postgres migration" This reverts commit 722fe5a04628882b787d096942459961db159b06. * Revert "Add sqlite3 migration" This reverts commit d113b03f6495a4b8f8bcf158a3d00b510b4240cc. * Fix selectRoomReceipts query * Make golangci-lint happy Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
2020-11-09 18:46:11 +00:00
continue
}
if err := t.processReceiptEvent(ctx, userID, roomID, "m.read", mread.Data.TS, mread.EventIDs); err != nil {
util.GetLogger(ctx).WithError(err).WithFields(logrus.Fields{
"sender": t.Origin,
"user_id": userID,
"room_id": roomID,
"events": mread.EventIDs,
}).Error("Failed to send receipt event to edu server")
continue
}
}
}
case eduserverAPI.MSigningKeyUpdate:
if err := t.processSigningKeyUpdate(ctx, e); err != nil {
logrus.WithError(err).Errorf("Failed to process signing key update")
}
default:
util.GetLogger(ctx).WithField("type", e.Type).Debug("Unhandled EDU")
}
}
}
func (t *txnReq) processSigningKeyUpdate(ctx context.Context, e gomatrixserverlib.EDU) error {
var updatePayload eduserverAPI.CrossSigningKeyUpdate
if err := json.Unmarshal(e.Content, &updatePayload); err != nil {
util.GetLogger(ctx).WithError(err).WithFields(logrus.Fields{
"user_id": updatePayload.UserID,
}).Debug("Failed to unmarshal signing key update")
return err
}
keys := gomatrixserverlib.CrossSigningKeys{}
if updatePayload.MasterKey != nil {
keys.MasterKey = *updatePayload.MasterKey
}
if updatePayload.SelfSigningKey != nil {
keys.SelfSigningKey = *updatePayload.SelfSigningKey
}
uploadReq := &keyapi.PerformUploadDeviceKeysRequest{
CrossSigningKeys: keys,
UserID: updatePayload.UserID,
}
uploadRes := &keyapi.PerformUploadDeviceKeysResponse{}
t.keyAPI.PerformUploadDeviceKeys(ctx, uploadReq, uploadRes)
if uploadRes.Error != nil {
return uploadRes.Error
}
return nil
}
Implement read receipts (#1528) * fix conversion from int to string yields a string of one rune, not a string of digits * Add receipts table to syncapi * Use StreamingToken as the since value * Add required method to testEDUProducer * Make receipt json creation "easier" to read * Add receipts api to the eduserver * Add receipts endpoint * Add eduserver kafka consumer * Add missing kafka config * Add passing tests to whitelist Signed-off-by: Till Faelligen <tfaelligen@gmail.com> * Fix copy & paste error * Fix column count error * Make outbound federation receipts pass * Make "Inbound federation rejects receipts from wrong remote" pass * Don't use errors package * - Add TODO for batching requests - Rename variable * Return a better error message * - Use OutputReceiptEvent instead of InputReceiptEvent as result - Don't use the errors package for errors - Defer CloseAndLogIfError to close rows - Fix Copyright * Better creation/usage of JoinResponse * Query all joined rooms instead of just one * Update gomatrixserverlib * Add sqlite3 migration * Add postgres migration * Ensure required sequence exists before running migrations * Clarification on comment * - Fix a bug when creating client receipts - Use concrete types instead of interface{} * Remove dead code Use key for timestamp * Fix postgres query... * Remove single purpose struct * Use key/value directly * Only apply receipts on initial sync or if edu positions differ, otherwise we'll be sending the same receipts over and over again. * Actually update the id, so it is correctly send in syncs * Set receipt on request to /read_markers * Fix issue with receipts getting overwritten * Use fmt.Errorf instead of pkg/errors * Revert "Add postgres migration" This reverts commit 722fe5a04628882b787d096942459961db159b06. * Revert "Add sqlite3 migration" This reverts commit d113b03f6495a4b8f8bcf158a3d00b510b4240cc. * Fix selectRoomReceipts query * Make golangci-lint happy Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
2020-11-09 18:46:11 +00:00
// processReceiptEvent sends receipt events to the edu server
func (t *txnReq) processReceiptEvent(ctx context.Context,
userID, roomID, receiptType string,
timestamp gomatrixserverlib.Timestamp,
eventIDs []string,
) error {
// store every event
for _, eventID := range eventIDs {
req := eduserverAPI.InputReceiptEventRequest{
InputReceiptEvent: eduserverAPI.InputReceiptEvent{
UserID: userID,
RoomID: roomID,
EventID: eventID,
Type: receiptType,
Timestamp: timestamp,
},
}
resp := eduserverAPI.InputReceiptEventResponse{}
if err := t.eduAPI.InputReceiptEvent(ctx, &req, &resp); err != nil {
return fmt.Errorf("unable to set receipt event: %w", err)
}
}
return nil
}
func (t *txnReq) processDeviceListUpdate(ctx context.Context, e gomatrixserverlib.EDU) {
var payload gomatrixserverlib.DeviceListUpdateEvent
if err := json.Unmarshal(e.Content, &payload); err != nil {
util.GetLogger(ctx).WithError(err).Error("Failed to unmarshal device list update event")
return
}
var inputRes keyapi.InputDeviceListUpdateResponse
t.keyAPI.InputDeviceListUpdate(context.Background(), &keyapi.InputDeviceListUpdateRequest{
Event: payload,
}, &inputRes)
if inputRes.Error != nil {
util.GetLogger(ctx).WithError(inputRes.Error).WithField("user_id", payload.UserID).Error("failed to InputDeviceListUpdate")
}
}