Media filename handling improvements (#1140)

* Derive content ID from hash+filename but preserve dedupe, improve Content-Disposition handling and ASCII handling

* Linter fix

* Some more comments

* Update sytest-whitelist
This commit is contained in:
Neil Alexander 2020-06-17 11:53:26 +01:00 committed by GitHub
parent a66a3b830c
commit 5d5aa0a31d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 80 additions and 13 deletions

View file

@ -28,6 +28,7 @@ import (
"strconv"
"strings"
"sync"
"unicode"
"github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/internal/config"
@ -54,6 +55,7 @@ type downloadRequest struct {
IsThumbnailRequest bool
ThumbnailSize types.ThumbnailSize
Logger *log.Entry
DownloadFilename string
}
// Download implements GET /download and GET /thumbnail
@ -73,6 +75,7 @@ func Download(
activeRemoteRequests *types.ActiveRemoteRequests,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
isThumbnailRequest bool,
customFilename string,
) {
dReq := &downloadRequest{
MediaMetadata: &types.MediaMetadata{
@ -84,6 +87,7 @@ func Download(
"Origin": origin,
"MediaID": mediaID,
}),
DownloadFilename: customFilename,
}
if dReq.IsThumbnailRequest {
@ -301,16 +305,8 @@ func (r *downloadRequest) respondFromLocalFile(
}).Info("Responding with file")
responseFile = file
responseMetadata = r.MediaMetadata
if len(responseMetadata.UploadName) > 0 {
uploadName, err := url.PathUnescape(string(responseMetadata.UploadName))
if err != nil {
return nil, fmt.Errorf("url.PathUnescape: %w", err)
}
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=utf-8"%s"`,
strings.ReplaceAll(uploadName, `"`, `\"`), // escape quote marks only, as per RFC6266
))
if err := r.addDownloadFilenameToHeaders(w, responseMetadata); err != nil {
return nil, err
}
}
@ -329,6 +325,67 @@ func (r *downloadRequest) respondFromLocalFile(
return responseMetadata, nil
}
func (r *downloadRequest) addDownloadFilenameToHeaders(
w http.ResponseWriter,
responseMetadata *types.MediaMetadata,
) error {
// If the requestor supplied a filename to name the download then
// use that, otherwise use the filename from the response metadata.
filename := string(responseMetadata.UploadName)
if r.DownloadFilename != "" {
filename = r.DownloadFilename
}
if len(filename) == 0 {
return nil
}
unescaped, err := url.PathUnescape(filename)
if err != nil {
return fmt.Errorf("url.PathUnescape: %w", err)
}
isASCII := true // Is the string ASCII or UTF-8?
quote := `` // Encloses the string (ASCII only)
for i := 0; i < len(unescaped); i++ {
if unescaped[i] > unicode.MaxASCII {
isASCII = false
}
if unescaped[i] == 0x20 || unescaped[i] == 0x3B {
// If the filename contains a space or a semicolon, which
// are special characters in Content-Disposition
quote = `"`
}
}
// We don't necessarily want a full escape as the Content-Disposition
// can take many of the characters that PathEscape would otherwise and
// browser support for encoding is a bit wild, so we'll escape only
// the characters that we know will mess up the parsing of the
// Content-Disposition header elements themselves
unescaped = strings.ReplaceAll(unescaped, `\`, `\\"`)
unescaped = strings.ReplaceAll(unescaped, `"`, `\"`)
if isASCII {
// For ASCII filenames, we should only quote the filename if
// it needs to be done, e.g. it contains a space or a character
// that would otherwise be parsed as a control character in the
// Content-Disposition header
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=%s%s%s`,
quote, unescaped, quote,
))
} else {
// For UTF-8 filenames, we quote always, as that's the standard
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=utf-8"%s"`,
unescaped,
))
}
return nil
}
// Note: Thumbnail generation may be ongoing asynchronously.
// If no thumbnail was found then returns nil, nil, nil
func (r *downloadRequest) getThumbnailFile(

View file

@ -66,7 +66,9 @@ func Setup(
downloadHandler := makeDownloadAPI("download", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration)
r0mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
r0mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
r0mux.Handle("/thumbnail/{serverName}/{mediaId}",
makeDownloadAPI("thumbnail", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration),
@ -120,6 +122,7 @@ func makeDownloadAPI(
activeRemoteRequests,
activeThumbnailGeneration,
name == "thumbnail",
vars["downloadName"],
)
}
return promhttp.InstrumentHandlerCounter(counterVec, http.HandlerFunc(httpHandler))

View file

@ -16,6 +16,7 @@ package routing
import (
"context"
"encoding/base64"
"fmt"
"io"
"net/http"
@ -123,7 +124,9 @@ func (r *uploadRequest) doUpload(
r.MediaMetadata.FileSizeBytes = bytesWritten
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.MediaID = types.MediaID(hash)
r.MediaMetadata.MediaID = types.MediaID(base64.RawURLEncoding.EncodeToString(
[]byte(string(r.MediaMetadata.UploadName) + string(r.MediaMetadata.Base64Hash)),
))
r.Logger = r.Logger.WithField("MediaID", r.MediaMetadata.MediaID)