Deduplicate state block contents

This commit is contained in:
Neil Alexander 2021-04-15 15:51:15 +01:00
parent dbd53fa9ff
commit 6900e0f495
No known key found for this signature in database
GPG key ID: A02A2019A2BB0944
10 changed files with 277 additions and 344 deletions

View file

@ -88,6 +88,11 @@ const bulkSelectStateEventByIDSQL = "" +
" WHERE event_id = ANY($1)" +
" ORDER BY event_type_nid, event_state_key_nid ASC"
const bulkSelectStateEventByNIDSQL = "" +
"SELECT event_type_nid, event_state_key_nid, event_nid FROM roomserver_events" +
" WHERE event_nid = ANY($1)" +
" ORDER BY event_type_nid, event_state_key_nid ASC"
const bulkSelectStateAtEventByIDSQL = "" +
"SELECT event_type_nid, event_state_key_nid, event_nid, state_snapshot_nid, is_rejected FROM roomserver_events" +
" WHERE event_id = ANY($1)"
@ -127,6 +132,7 @@ type eventStatements struct {
insertEventStmt *sql.Stmt
selectEventStmt *sql.Stmt
bulkSelectStateEventByIDStmt *sql.Stmt
bulkSelectStateEventByNIDStmt *sql.Stmt
bulkSelectStateAtEventByIDStmt *sql.Stmt
updateEventStateStmt *sql.Stmt
selectEventSentToOutputStmt *sql.Stmt
@ -151,6 +157,7 @@ func NewPostgresEventsTable(db *sql.DB) (tables.Events, error) {
{&s.insertEventStmt, insertEventSQL},
{&s.selectEventStmt, selectEventSQL},
{&s.bulkSelectStateEventByIDStmt, bulkSelectStateEventByIDSQL},
{&s.bulkSelectStateEventByNIDStmt, bulkSelectStateEventByNIDSQL},
{&s.bulkSelectStateAtEventByIDStmt, bulkSelectStateAtEventByIDSQL},
{&s.updateEventStateStmt, updateEventStateSQL},
{&s.updateEventSentToOutputStmt, updateEventSentToOutputSQL},
@ -238,6 +245,48 @@ func (s *eventStatements) BulkSelectStateEventByID(
return results, nil
}
// bulkSelectStateEventByNID lookups a list of state events by event NID.
// If any of the requested events are missing from the database it returns a types.MissingEventError
func (s *eventStatements) BulkSelectStateEventByNID(
ctx context.Context, eventNIDs []types.EventNID,
) ([]types.StateEntry, error) {
rows, err := s.bulkSelectStateEventByNIDStmt.QueryContext(ctx, eventNIDsAsArray(eventNIDs))
if err != nil {
return nil, err
}
defer internal.CloseAndLogIfError(ctx, rows, "bulkSelectStateEventByID: rows.close() failed")
// We know that we will only get as many results as event IDs
// because of the unique constraint on event IDs.
// So we can allocate an array of the correct size now.
// We might get fewer results than IDs so we adjust the length of the slice before returning it.
results := make([]types.StateEntry, len(eventNIDs))
i := 0
for ; rows.Next(); i++ {
result := &results[i]
if err = rows.Scan(
&result.EventTypeNID,
&result.EventStateKeyNID,
&result.EventNID,
); err != nil {
return nil, err
}
}
if err = rows.Err(); err != nil {
return nil, err
}
if i != len(eventNIDs) {
// If there are fewer rows returned than IDs then we were asked to lookup event IDs we don't have.
// We don't know which ones were missing because we don't return the string IDs in the query.
// However it should be possible debug this by replaying queries or entries from the input kafka logs.
// If this turns out to be impossible and we do need the debug information here, it would be better
// to do it as a separate query rather than slowing down/complicating the internal case.
return nil, types.MissingEventError(
fmt.Sprintf("storage: state event IDs missing from the database (%d != %d)", i, len(eventNIDs)),
)
}
return results, nil
}
// bulkSelectStateAtEventByID lookups the state at a list of events by event ID.
// If any of the requested events are missing from the database it returns a types.MissingEventError.
// If we do not have the state for any of the requested events it returns a types.MissingEventError.

View file

@ -39,53 +39,27 @@ const stateDataSchema = `
-- lookup a specific (type, state_key) pair for an event. It also makes it easy
-- to read the state for a given state_block_nid ordered by (type, state_key)
-- which in turn makes it easier to merge state data blocks.
CREATE SEQUENCE IF NOT EXISTS roomserver_state_block_nid_seq;
CREATE TABLE IF NOT EXISTS roomserver_state_block (
-- Local numeric ID for this state data.
state_block_nid bigint NOT NULL,
event_type_nid bigint NOT NULL,
event_state_key_nid bigint NOT NULL,
event_nid bigint NOT NULL,
UNIQUE (state_block_nid, event_type_nid, event_state_key_nid)
state_block_nid bigserial PRIMARY KEY,
event_nids bigint[] NOT NULL,
UNIQUE (event_nids)
);
`
const insertStateDataSQL = "" +
"INSERT INTO roomserver_state_block (state_block_nid, event_type_nid, event_state_key_nid, event_nid)" +
" VALUES ($1, $2, $3, $4)"
"INSERT INTO roomserver_state_block (event_nids)" +
" VALUES ($1)" +
" ON CONFLICT (event_nids) DO UPDATE SET event_nids=$1" +
" RETURNING state_block_nid"
const selectNextStateBlockNIDSQL = "" +
"SELECT nextval('roomserver_state_block_nid_seq')"
// Bulk state lookup by numeric state block ID.
// Sort by the state_block_nid, event_type_nid, event_state_key_nid
// This means that all the entries for a given state_block_nid will appear
// together in the list and those entries will sorted by event_type_nid
// and event_state_key_nid. This property makes it easier to merge two
// state data blocks together.
const bulkSelectStateBlockEntriesSQL = "" +
"SELECT state_block_nid, event_type_nid, event_state_key_nid, event_nid" +
" FROM roomserver_state_block WHERE state_block_nid = ANY($1)" +
" ORDER BY state_block_nid, event_type_nid, event_state_key_nid"
// Bulk state lookup by numeric state block ID.
// Filters the rows in each block to the requested types and state keys.
// We would like to restrict to particular type state key pairs but we are
// restricted by the query language to pull the cross product of a list
// of types and a list state_keys. So we have to filter the result in the
// application to restrict it to the list of event types and state keys we
// actually wanted.
const bulkSelectFilteredStateBlockEntriesSQL = "" +
"SELECT state_block_nid, event_type_nid, event_state_key_nid, event_nid" +
" FROM roomserver_state_block WHERE state_block_nid = ANY($1)" +
" AND event_type_nid = ANY($2) AND event_state_key_nid = ANY($3)" +
" ORDER BY state_block_nid, event_type_nid, event_state_key_nid"
"SELECT state_block_nid, event_nids" +
" FROM roomserver_state_block WHERE state_block_nid = ANY($1)"
type stateBlockStatements struct {
insertStateDataStmt *sql.Stmt
selectNextStateBlockNIDStmt *sql.Stmt
bulkSelectStateBlockEntriesStmt *sql.Stmt
bulkSelectFilteredStateBlockEntriesStmt *sql.Stmt
insertStateDataStmt *sql.Stmt
bulkSelectStateBlockEntriesStmt *sql.Stmt
}
func NewPostgresStateBlockTable(db *sql.DB) (tables.StateBlock, error) {
@ -97,85 +71,48 @@ func NewPostgresStateBlockTable(db *sql.DB) (tables.StateBlock, error) {
return s, shared.StatementList{
{&s.insertStateDataStmt, insertStateDataSQL},
{&s.selectNextStateBlockNIDStmt, selectNextStateBlockNIDSQL},
{&s.bulkSelectStateBlockEntriesStmt, bulkSelectStateBlockEntriesSQL},
{&s.bulkSelectFilteredStateBlockEntriesStmt, bulkSelectFilteredStateBlockEntriesSQL},
}.Prepare(db)
}
func (s *stateBlockStatements) BulkInsertStateData(
ctx context.Context,
txn *sql.Tx,
entries []types.StateEntry,
) (types.StateBlockNID, error) {
stateBlockNID, err := s.selectNextStateBlockNID(ctx)
if err != nil {
return 0, err
entries types.StateEntries,
) (id types.StateBlockNID, err error) {
entries = entries[:util.SortAndUnique(entries)]
var nids []int64
for _, e := range entries {
nids = append(nids, int64(e.EventNID))
}
for _, entry := range entries {
_, err := s.insertStateDataStmt.ExecContext(
ctx,
int64(stateBlockNID),
int64(entry.EventTypeNID),
int64(entry.EventStateKeyNID),
int64(entry.EventNID),
)
if err != nil {
return 0, err
}
}
return stateBlockNID, nil
}
func (s *stateBlockStatements) selectNextStateBlockNID(
ctx context.Context,
) (types.StateBlockNID, error) {
var stateBlockNID int64
err := s.selectNextStateBlockNIDStmt.QueryRowContext(ctx).Scan(&stateBlockNID)
return types.StateBlockNID(stateBlockNID), err
err = s.insertStateDataStmt.QueryRowContext(
ctx, pq.Int64Array(nids),
).Scan(&id)
return
}
func (s *stateBlockStatements) BulkSelectStateBlockEntries(
ctx context.Context, stateBlockNIDs []types.StateBlockNID,
) ([]types.StateEntryList, error) {
nids := make([]int64, len(stateBlockNIDs))
for i := range stateBlockNIDs {
nids[i] = int64(stateBlockNIDs[i])
}
rows, err := s.bulkSelectStateBlockEntriesStmt.QueryContext(ctx, pq.Int64Array(nids))
ctx context.Context, stateBlockNIDs types.StateBlockNIDs,
) ([][]types.EventNID, error) {
rows, err := s.bulkSelectStateBlockEntriesStmt.QueryContext(ctx, stateBlockNIDsAsArray(stateBlockNIDs))
if err != nil {
return nil, err
}
defer internal.CloseAndLogIfError(ctx, rows, "bulkSelectStateBlockEntries: rows.close() failed")
results := make([]types.StateEntryList, len(stateBlockNIDs))
// current is a pointer to the StateEntryList to append the state entries to.
var current *types.StateEntryList
results := make([][]types.EventNID, len(stateBlockNIDs))
i := 0
for rows.Next() {
var (
stateBlockNID int64
eventTypeNID int64
eventStateKeyNID int64
eventNID int64
entry types.StateEntry
)
if err = rows.Scan(
&stateBlockNID, &eventTypeNID, &eventStateKeyNID, &eventNID,
); err != nil {
for ; rows.Next(); i++ {
var stateBlockNID types.StateBlockNID
var result pq.Int64Array
if err = rows.Scan(&stateBlockNID, &result); err != nil {
return nil, err
}
entry.EventTypeNID = types.EventTypeNID(eventTypeNID)
entry.EventStateKeyNID = types.EventStateKeyNID(eventStateKeyNID)
entry.EventNID = types.EventNID(eventNID)
if current == nil || types.StateBlockNID(stateBlockNID) != current.StateBlockNID {
// The state entry row is for a different state data block to the current one.
// So we start appending to the next entry in the list.
current = &results[i]
current.StateBlockNID = types.StateBlockNID(stateBlockNID)
i++
r := []types.EventNID{}
for _, e := range result {
r = append(r, types.EventNID(e))
}
current.StateEntries = append(current.StateEntries, entry)
results[i] = r
}
if err = rows.Err(); err != nil {
return nil, err
@ -186,71 +123,6 @@ func (s *stateBlockStatements) BulkSelectStateBlockEntries(
return results, err
}
func (s *stateBlockStatements) BulkSelectFilteredStateBlockEntries(
ctx context.Context,
stateBlockNIDs []types.StateBlockNID,
stateKeyTuples []types.StateKeyTuple,
) ([]types.StateEntryList, error) {
tuples := stateKeyTupleSorter(stateKeyTuples)
// Sort the tuples so that we can run binary search against them as we filter the rows returned by the db.
sort.Sort(tuples)
eventTypeNIDArray, eventStateKeyNIDArray := tuples.typesAndStateKeysAsArrays()
rows, err := s.bulkSelectFilteredStateBlockEntriesStmt.QueryContext(
ctx,
stateBlockNIDsAsArray(stateBlockNIDs),
eventTypeNIDArray,
eventStateKeyNIDArray,
)
if err != nil {
return nil, err
}
defer internal.CloseAndLogIfError(ctx, rows, "bulkSelectFilteredStateBlockEntries: rows.close() failed")
var results []types.StateEntryList
var current types.StateEntryList
for rows.Next() {
var (
stateBlockNID int64
eventTypeNID int64
eventStateKeyNID int64
eventNID int64
entry types.StateEntry
)
if err := rows.Scan(
&stateBlockNID, &eventTypeNID, &eventStateKeyNID, &eventNID,
); err != nil {
return nil, err
}
entry.EventTypeNID = types.EventTypeNID(eventTypeNID)
entry.EventStateKeyNID = types.EventStateKeyNID(eventStateKeyNID)
entry.EventNID = types.EventNID(eventNID)
// We can use binary search here because we sorted the tuples earlier
if !tuples.contains(entry.StateKeyTuple) {
// The select will return the cross product of types and state keys.
// So we need to check if type of the entry is in the list.
continue
}
if types.StateBlockNID(stateBlockNID) != current.StateBlockNID {
// The state entry row is for a different state data block to the current one.
// So we append the current entry to the results and start adding to a new one.
// The first time through the loop current will be empty.
if current.StateEntries != nil {
results = append(results, current)
}
current = types.StateEntryList{StateBlockNID: types.StateBlockNID(stateBlockNID)}
}
current.StateEntries = append(current.StateEntries, entry)
}
// Add the last entry to the list if it is not empty.
if current.StateEntries != nil {
results = append(results, current)
}
return results, rows.Err()
}
func stateBlockNIDsAsArray(stateBlockNIDs []types.StateBlockNID) pq.Int64Array {
nids := make([]int64, len(stateBlockNIDs))
for i := range stateBlockNIDs {

View file

@ -25,6 +25,7 @@ import (
"github.com/matrix-org/dendrite/roomserver/storage/shared"
"github.com/matrix-org/dendrite/roomserver/storage/tables"
"github.com/matrix-org/dendrite/roomserver/types"
"github.com/matrix-org/util"
)
const stateSnapshotSchema = `
@ -86,14 +87,16 @@ func NewPostgresStateSnapshotTable(db *sql.DB) (tables.StateSnapshot, error) {
}
func (s *stateSnapshotStatements) InsertState(
ctx context.Context, txn *sql.Tx, roomNID types.RoomNID, stateBlockNIDs []types.StateBlockNID,
ctx context.Context, txn *sql.Tx, roomNID types.RoomNID, stateBlockNIDs types.StateBlockNIDs,
) (stateNID types.StateSnapshotNID, err error) {
nids := make([]int64, len(stateBlockNIDs))
stateBlockNIDs = stateBlockNIDs[:util.SortAndUnique(stateBlockNIDs)]
nids := make(int64Sorter, len(stateBlockNIDs))
for i := range stateBlockNIDs {
nids[i] = int64(stateBlockNIDs[i])
}
nids = nids[:util.SortAndUnique(nids)]
var id int64
err = sqlutil.TxStmt(txn, s.insertStateStmt).QueryRowContext(ctx, int64(roomNID), pq.Int64Array(nids), int64(roomNID)).Scan(&id)
err = sqlutil.TxStmt(txn, s.insertStateStmt).QueryRowContext(ctx, int64(roomNID), pq.Int64Array(nids)).Scan(&id)
if err != nil {
return 0, err
}