Graceful shutdowns (#1734)

* Initial graceful stop

* Fix dendritejs

* Use process context for outbound federation requests in destination queues

* Reduce logging

* Fix log level
This commit is contained in:
Neil Alexander 2021-01-26 12:56:20 +00:00 committed by GitHub
parent 64fb6de6d4
commit 9f443317bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 187 additions and 24 deletions

View file

@ -15,22 +15,28 @@
package setup
import (
"context"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"syscall"
"time"
"github.com/matrix-org/dendrite/internal/caching"
"github.com/matrix-org/dendrite/internal/httputil"
"github.com/matrix-org/gomatrixserverlib"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/atomic"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"github.com/matrix-org/dendrite/internal"
"github.com/matrix-org/dendrite/setup/process"
"github.com/matrix-org/dendrite/userapi/storage/accounts"
"github.com/gorilla/mux"
@ -61,6 +67,7 @@ import (
// should only be used during start up.
// Must be closed when shutting down.
type BaseDendrite struct {
*process.ProcessContext
componentName string
tracerCloser io.Closer
PublicClientAPIMux *mux.Router
@ -161,7 +168,9 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, useHTTPAPIs boo
// We need to be careful with media APIs if they read from a filesystem to make sure they
// are not inadvertently reading paths without cleaning, else this could introduce a
// directory traversal attack e.g /../../../etc/passwd
return &BaseDendrite{
ProcessContext: process.NewProcessContext(),
componentName: componentName,
UseHTTPAPIs: useHTTPAPIs,
tracerCloser: closer,
@ -354,14 +363,26 @@ func (b *BaseDendrite) SetupAndServeHTTP(
if internalAddr != NoListener && internalAddr != externalAddr {
go func() {
var internalShutdown atomic.Bool // RegisterOnShutdown can be called more than once
logrus.Infof("Starting internal %s listener on %s", b.componentName, internalServ.Addr)
b.ProcessContext.ComponentStarted()
internalServ.RegisterOnShutdown(func() {
if internalShutdown.CAS(false, true) {
b.ProcessContext.ComponentFinished()
logrus.Infof("Stopped internal HTTP listener")
}
})
if certFile != nil && keyFile != nil {
if err := internalServ.ListenAndServeTLS(*certFile, *keyFile); err != nil {
logrus.WithError(err).Fatal("failed to serve HTTPS")
if err != http.ErrServerClosed {
logrus.WithError(err).Fatal("failed to serve HTTPS")
}
}
} else {
if err := internalServ.ListenAndServe(); err != nil {
logrus.WithError(err).Fatal("failed to serve HTTP")
if err != http.ErrServerClosed {
logrus.WithError(err).Fatal("failed to serve HTTP")
}
}
}
logrus.Infof("Stopped internal %s listener on %s", b.componentName, internalServ.Addr)
@ -370,19 +391,52 @@ func (b *BaseDendrite) SetupAndServeHTTP(
if externalAddr != NoListener {
go func() {
var externalShutdown atomic.Bool // RegisterOnShutdown can be called more than once
logrus.Infof("Starting external %s listener on %s", b.componentName, externalServ.Addr)
b.ProcessContext.ComponentStarted()
externalServ.RegisterOnShutdown(func() {
if externalShutdown.CAS(false, true) {
b.ProcessContext.ComponentFinished()
logrus.Infof("Stopped external HTTP listener")
}
})
if certFile != nil && keyFile != nil {
if err := externalServ.ListenAndServeTLS(*certFile, *keyFile); err != nil {
logrus.WithError(err).Fatal("failed to serve HTTPS")
if err != http.ErrServerClosed {
logrus.WithError(err).Fatal("failed to serve HTTPS")
}
}
} else {
if err := externalServ.ListenAndServe(); err != nil {
logrus.WithError(err).Fatal("failed to serve HTTP")
if err != http.ErrServerClosed {
logrus.WithError(err).Fatal("failed to serve HTTP")
}
}
}
logrus.Infof("Stopped external %s listener on %s", b.componentName, externalServ.Addr)
}()
}
select {}
<-b.ProcessContext.WaitForShutdown()
ctx, cancel := context.WithCancel(context.Background())
cancel()
_ = internalServ.Shutdown(ctx)
_ = externalServ.Shutdown(ctx)
logrus.Infof("Stopped HTTP listeners")
}
func (b *BaseDendrite) WaitForShutdown() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
<-sigs
signal.Reset(syscall.SIGINT, syscall.SIGTERM)
logrus.Warnf("Shutdown signal received")
b.ProcessContext.ShutdownDendrite()
b.ProcessContext.WaitForComponentsToFinish()
logrus.Warnf("Dendrite is exiting now")
}

View file

@ -27,6 +27,7 @@ import (
"github.com/matrix-org/dendrite/mediaapi"
roomserverAPI "github.com/matrix-org/dendrite/roomserver/api"
"github.com/matrix-org/dendrite/setup/config"
"github.com/matrix-org/dendrite/setup/process"
serverKeyAPI "github.com/matrix-org/dendrite/signingkeyserver/api"
"github.com/matrix-org/dendrite/syncapi"
userapi "github.com/matrix-org/dendrite/userapi/api"
@ -56,7 +57,7 @@ type Monolith struct {
}
// AddAllPublicRoutes attaches all public paths to the given router
func (m *Monolith) AddAllPublicRoutes(csMux, ssMux, keyMux, mediaMux *mux.Router) {
func (m *Monolith) AddAllPublicRoutes(process *process.ProcessContext, csMux, ssMux, keyMux, mediaMux *mux.Router) {
clientapi.AddPublicRoutes(
csMux, &m.Config.ClientAPI, m.AccountDB,
m.FedClient, m.RoomserverAPI,
@ -71,7 +72,7 @@ func (m *Monolith) AddAllPublicRoutes(csMux, ssMux, keyMux, mediaMux *mux.Router
)
mediaapi.AddPublicRoutes(mediaMux, &m.Config.MediaAPI, m.UserAPI, m.Client)
syncapi.AddPublicRoutes(
csMux, m.UserAPI, m.RoomserverAPI,
process, csMux, m.UserAPI, m.RoomserverAPI,
m.KeyAPI, m.FedClient, &m.Config.SyncAPI,
)
}

45
setup/process/process.go Normal file
View file

@ -0,0 +1,45 @@
package process
import (
"context"
"sync"
)
type ProcessContext struct {
wg *sync.WaitGroup // used to wait for components to shutdown
ctx context.Context // cancelled when Stop is called
shutdown context.CancelFunc // shut down Dendrite
}
func NewProcessContext() *ProcessContext {
ctx, shutdown := context.WithCancel(context.Background())
return &ProcessContext{
ctx: ctx,
shutdown: shutdown,
wg: &sync.WaitGroup{},
}
}
func (b *ProcessContext) Context() context.Context {
return context.WithValue(b.ctx, "scope", "process") // nolint:staticcheck
}
func (b *ProcessContext) ComponentStarted() {
b.wg.Add(1)
}
func (b *ProcessContext) ComponentFinished() {
b.wg.Done()
}
func (b *ProcessContext) ShutdownDendrite() {
b.shutdown()
}
func (b *ProcessContext) WaitForShutdown() <-chan struct{} {
return b.ctx.Done()
}
func (b *ProcessContext) WaitForComponentsToFinish() {
b.wg.Wait()
}