Add HTTP status code to FederationClientError (#2699)

Also ensures we wait on more HTTP status codes.
This commit is contained in:
Till 2022-09-07 16:14:09 +02:00 committed by GitHub
parent 7e8c605f98
commit 0d697f6754
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 12 deletions

View file

@ -407,10 +407,13 @@ userLoop:
waitTime = e.RetryAfter
} else if e.Blacklisted {
waitTime = time.Hour * 8
} else {
break userLoop
} else if e.Code >= 300 {
// We didn't get a real FederationClientError (e.g. in polylith mode, where gomatrix.HTTPError
// are "converted" to FederationClientError), but we probably shouldn't hit them every $waitTime seconds.
waitTime = time.Hour
break userLoop
}
break userLoop
case net.Error:
// Use the default waitTime, if it's a timeout.
// It probably doesn't make sense to try further users.
@ -420,9 +423,10 @@ userLoop:
break userLoop
}
case gomatrix.HTTPError:
// The remote server returned an error, give it some time to recover
if e.Code >= 500 {
waitTime = time.Minute * 10
// The remote server returned an error, give it some time to recover.
// This is to avoid spamming remote servers, which may not be Matrix servers anymore.
if e.Code >= 300 {
waitTime = time.Hour
logrus.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError")
break userLoop
}
@ -459,9 +463,10 @@ userLoop:
}
if failCount > 0 {
logger.WithFields(logrus.Fields{
"total": len(userIDs),
"failed": failCount,
"skipped": len(userIDs) - failCount,
"total": len(userIDs),
"failed": failCount,
"skipped": len(userIDs) - failCount,
"waittime": waitTime,
}).Warn("Failed to query device keys for some users")
}
for _, userID := range userIDs {