Fulltext implementation incl. config (#2480)

This adds the main component of the fulltext search.
This PR doesn't do anything yet, besides creating an empty fulltextindex
folder if enabled. Indexing events is done in a separate PR.
This commit is contained in:
Till 2022-09-07 18:15:54 +02:00 committed by GitHub
parent 31f4ae8997
commit d5876abbe9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 647 additions and 2 deletions

View file

@ -38,6 +38,7 @@ import (
"golang.org/x/net/http2/h2c"
"github.com/matrix-org/dendrite/internal/caching"
"github.com/matrix-org/dendrite/internal/fulltext"
"github.com/matrix-org/dendrite/internal/httputil"
"github.com/matrix-org/dendrite/internal/pushgateway"
"github.com/matrix-org/dendrite/internal/sqlutil"
@ -90,6 +91,7 @@ type BaseDendrite struct {
Database *sql.DB
DatabaseWriter sqlutil.Writer
EnableMetrics bool
Fulltext *fulltext.Search
startupLock sync.Mutex
}
@ -150,6 +152,15 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base
logrus.WithError(err).Panicf("failed to start opentracing")
}
var fts *fulltext.Search
isSyncOrMonolith := componentName == "syncapi" || isMonolith
if cfg.SyncAPI.Fulltext.Enabled && isSyncOrMonolith {
fts, err = fulltext.New(cfg.SyncAPI.Fulltext)
if err != nil {
logrus.WithError(err).Panicf("failed to create full text")
}
}
if cfg.Global.Sentry.Enabled {
logrus.Info("Setting up Sentry for debugging...")
err = sentry.Init(sentry.ClientOptions{
@ -247,6 +258,7 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base
Database: db, // set if monolith with global connection pool only
DatabaseWriter: writer, // set if monolith with global connection pool only
EnableMetrics: enableMetrics,
Fulltext: fts,
}
}

View file

@ -9,6 +9,8 @@ type SyncAPI struct {
Database DatabaseOptions `yaml:"database,omitempty"`
RealIPHeader string `yaml:"real_ip_header"`
Fulltext Fulltext `yaml:"fulltext"`
}
func (c *SyncAPI) Defaults(opts DefaultOpts) {
@ -18,6 +20,7 @@ func (c *SyncAPI) Defaults(opts DefaultOpts) {
c.ExternalAPI.Listen = "http://localhost:8073"
c.Database.Defaults(20)
}
c.Fulltext.Defaults(opts)
if opts.Generate {
if !opts.Monolithic {
c.Database.ConnectionString = "file:syncapi.db"
@ -26,6 +29,7 @@ func (c *SyncAPI) Defaults(opts DefaultOpts) {
}
func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) {
c.Fulltext.Verify(configErrs, isMonolith)
if isMonolith { // polylith required configs below
return
}
@ -36,3 +40,25 @@ func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) {
checkURL(configErrs, "sync_api.internal_api.connect", string(c.InternalAPI.Connect))
checkURL(configErrs, "sync_api.external_api.listen", string(c.ExternalAPI.Listen))
}
type Fulltext struct {
Enabled bool `yaml:"enabled"`
IndexPath Path `yaml:"index_path"`
InMemory bool `yaml:"in_memory"` // only useful in tests
Language string `yaml:"language"` // the language to use when analysing content
}
func (f *Fulltext) Defaults(opts DefaultOpts) {
f.Enabled = false
f.IndexPath = "./fulltextindex"
f.Language = "en"
if opts.Generate {
f.Enabled = true
f.InMemory = true
}
}
func (f *Fulltext) Verify(configErrs *ConfigErrors, isMonolith bool) {
checkNotEmpty(configErrs, "syncapi.fulltext.index_path", string(f.IndexPath))
checkNotEmpty(configErrs, "syncapi.fulltext.language", f.Language)
}

View file

@ -29,6 +29,7 @@ var (
OutputReadUpdate = "OutputReadUpdate"
RequestPresence = "GetPresence"
OutputPresenceEvent = "OutputPresenceEvent"
InputFulltextReindex = "InputFulltextReindex"
)
var safeCharacters = regexp.MustCompile("[^A-Za-z0-9$]+")