State storage refactor (#1839)

* Hash-deduplicated state storage (and migrations) for PostgreSQL and SQLite

* Refactor droomserver database setup for migrations

* Fix conflict statements

* Update migration names

* Set a boundary for old to new block/snapshot IDs so we don't rewrite them more than once accidentally

* Create sequence if not exists

* Fix boundary queries

* Fix boundary queries

* Use Query

* Break out queries a bit

* More sequence tweaks

* Query parameters are not playing the game

* Injection escaping may not work for CREATE SEQUENCE after all

* Fix snapshot sequence name

* Use boundaried IDs in SQLite too

* Use IFNULL for SQLite

* Use COALESCE in PostgreSQL

* Review comments @Kegsay
This commit is contained in:
Neil Alexander 2021-04-26 13:25:57 +01:00 committed by GitHub
parent d6e9b7b307
commit 5ce1fe80de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 1076 additions and 554 deletions

View file

@ -25,6 +25,7 @@ import (
"github.com/matrix-org/dendrite/roomserver/storage/shared"
"github.com/matrix-org/dendrite/roomserver/storage/tables"
"github.com/matrix-org/dendrite/roomserver/types"
"github.com/matrix-org/util"
)
const stateSnapshotSchema = `
@ -40,19 +41,29 @@ const stateSnapshotSchema = `
-- the full state under single state_block_nid.
CREATE SEQUENCE IF NOT EXISTS roomserver_state_snapshot_nid_seq;
CREATE TABLE IF NOT EXISTS roomserver_state_snapshots (
-- Local numeric ID for the state.
state_snapshot_nid bigint PRIMARY KEY DEFAULT nextval('roomserver_state_snapshot_nid_seq'),
-- Local numeric ID of the room this state is for.
-- Unused in normal operation, but useful for background work or ad-hoc debugging.
room_nid bigint NOT NULL,
-- List of state_block_nids, stored sorted by state_block_nid.
state_block_nids bigint[] NOT NULL
-- The state snapshot NID that identifies this snapshot.
state_snapshot_nid bigint PRIMARY KEY DEFAULT nextval('roomserver_state_snapshot_nid_seq'),
-- The hash of the state snapshot, which is used to enforce uniqueness. The hash is
-- generated in Dendrite and passed through to the database, as a btree index over
-- this column is cheap and fits within the maximum index size.
state_snapshot_hash BYTEA UNIQUE,
-- The room NID that the snapshot belongs to.
room_nid bigint NOT NULL,
-- The state blocks contained within this snapshot.
state_block_nids bigint[] NOT NULL
);
`
// Insert a new state snapshot. If we conflict on the hash column then
// we must perform an update so that the RETURNING statement returns the
// ID of the row that we conflicted with, so that we can then refer to
// the original snapshot.
const insertStateSQL = "" +
"INSERT INTO roomserver_state_snapshots (room_nid, state_block_nids)" +
" VALUES ($1, $2)" +
"INSERT INTO roomserver_state_snapshots (state_snapshot_hash, room_nid, state_block_nids)" +
" VALUES ($1, $2, $3)" +
" ON CONFLICT (state_snapshot_hash) DO UPDATE SET room_nid=$2" +
// Performing an update, above, ensures that the RETURNING statement
// below will always return a valid state snapshot ID
" RETURNING state_snapshot_nid"
// Bulk state data NID lookup.
@ -67,12 +78,13 @@ type stateSnapshotStatements struct {
bulkSelectStateBlockNIDsStmt *sql.Stmt
}
func NewPostgresStateSnapshotTable(db *sql.DB) (tables.StateSnapshot, error) {
s := &stateSnapshotStatements{}
func createStateSnapshotTable(db *sql.DB) error {
_, err := db.Exec(stateSnapshotSchema)
if err != nil {
return nil, err
}
return err
}
func prepareStateSnapshotTable(db *sql.DB) (tables.StateSnapshot, error) {
s := &stateSnapshotStatements{}
return s, shared.StatementList{
{&s.insertStateStmt, insertStateSQL},
@ -81,13 +93,15 @@ func NewPostgresStateSnapshotTable(db *sql.DB) (tables.StateSnapshot, error) {
}
func (s *stateSnapshotStatements) InsertState(
ctx context.Context, txn *sql.Tx, roomNID types.RoomNID, stateBlockNIDs []types.StateBlockNID,
ctx context.Context, txn *sql.Tx, roomNID types.RoomNID, nids types.StateBlockNIDs,
) (stateNID types.StateSnapshotNID, err error) {
nids := make([]int64, len(stateBlockNIDs))
for i := range stateBlockNIDs {
nids[i] = int64(stateBlockNIDs[i])
nids = nids[:util.SortAndUnique(nids)]
var id int64
err = sqlutil.TxStmt(txn, s.insertStateStmt).QueryRowContext(ctx, nids.Hash(), int64(roomNID), stateBlockNIDsAsArray(nids)).Scan(&id)
if err != nil {
return 0, err
}
err = sqlutil.TxStmt(txn, s.insertStateStmt).QueryRowContext(ctx, int64(roomNID), pq.Int64Array(nids)).Scan(&stateNID)
stateNID = types.StateSnapshotNID(id)
return
}