log less for failed key querys, add counters for incoming pdus/edus (#1801)

* log less for failed key querys, add counters for incoming pdus/edus

* use labels

* Blacklist flakey test

* Fix metrics
This commit is contained in:
Kegsay 2021-03-23 11:33:36 +00:00 committed by GitHub
parent 01267a34b9
commit a1b7e4ef3f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 9 deletions

View file

@ -330,16 +330,16 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
logger.WithError(err).Error("failed to load stale device lists")
return waitTime, true
}
hasFailures := false
failCount := 0
for _, userID := range userIDs {
if ctx.Err() != nil {
// we've timed out, give up and go to the back of the queue to let another server be processed.
hasFailures = true
failCount += 1
break
}
res, err := u.fedClient.GetUserDevices(ctx, serverName, userID)
if err != nil {
logger.WithError(err).WithField("user_id", userID).Error("failed to query device keys for user")
failCount += 1
fcerr, ok := err.(*fedsenderapi.FederationClientError)
if ok {
if fcerr.RetryAfter > 0 {
@ -351,20 +351,22 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
waitTime = time.Hour
logger.WithError(err).Warn("GetUserDevices returned unknown error type")
}
hasFailures = true
continue
}
err = u.updateDeviceList(&res)
if err != nil {
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
hasFailures = true
failCount += 1
}
}
if failCount > 0 {
logger.WithField("total", len(userIDs)).WithField("failed", failCount).Error("failed to query device keys for some users")
}
for _, userID := range userIDs {
// always clear the channel to unblock Update calls regardless of success/failure
u.clearChannel(userID)
}
return waitTime, hasFailures
return waitTime, failCount > 0
}
func (u *DeviceListUpdater) updateDeviceList(res *gomatrixserverlib.RespUserDevices) error {