Merge pull request #11267 from DSpace/backport-11260-to-dspace-9_x

[Port dspace-9_x] fix(#11191): Align Content-Disposition with RFC 5987/6266
This commit is contained in:
Tim Donohue
2025-09-05 17:29:28 -05:00
committed by GitHub
2 changed files with 58 additions and 7 deletions

View File

@@ -7,11 +7,13 @@
*/
package org.dspace.app.rest.utils;
import static jakarta.mail.internet.MimeUtility.encodeText;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
@@ -172,9 +174,16 @@ public class HttpHeadersInitializer {
// distposition may be null here if contentType is null
if (!isNullOrEmpty(disposition)) {
httpHeaders.put(CONTENT_DISPOSITION, Collections.singletonList(String.format(CONTENT_DISPOSITION_FORMAT,
disposition,
encodeText(fileName))));
String fallbackAsciiName = createFallbackAsciiName(this.fileName);
String encodedUtf8Name = createEncodedUtf8Name(this.fileName);
String headerValue = String.format(
"%s; filename=\"%s\"; filename*=UTF-8''%s",
disposition,
fallbackAsciiName,
encodedUtf8Name
);
httpHeaders.put(CONTENT_DISPOSITION, Collections.singletonList(headerValue));
}
log.debug("Content-Disposition : {}", disposition);
@@ -262,4 +271,41 @@ public class HttpHeadersInitializer {
return Arrays.binarySearch(matchValues, toMatch) > -1 || Arrays.binarySearch(matchValues, "*") > -1;
}
/**
* Creates a safe ASCII-only fallback filename by removing diacritics (accents)
* and replacing any remaining non-ASCII characters.
* E.g., "ä-ö-é.pdf" becomes "a-o-e.pdf".
* @param originalFilename The original filename.
* @return A string containing only ASCII characters.
*/
private String createFallbackAsciiName(String originalFilename) {
if (originalFilename == null) {
return "";
}
String normalized = Normalizer.normalize(originalFilename, Normalizer.Form.NFD);
String withoutAccents = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
return withoutAccents.replaceAll("[^\\x00-\\x7F]", "");
}
/**
* Creates a percent-encoded UTF-8 filename according to RFC 5987.
* This is for the `filename*` parameter.
* E.g., "ä ö é.pdf" becomes "%C3%A4%20%C3%B6%20%C3%A9.pdf".
* @param originalFilename The original filename.
* @return A percent-encoded string.
*/
private String createEncodedUtf8Name(String originalFilename) {
if (originalFilename == null) {
return "";
}
try {
String encoded = URLEncoder.encode(originalFilename, StandardCharsets.UTF_8.toString());
return encoded.replace("+", "%20");
} catch (java.io.UnsupportedEncodingException e) {
// Fallback to a simple ASCII name if encoding fails.
log.error("UTF-8 encoding not supported, which should not happen.", e);
return createFallbackAsciiName(originalFilename);
}
}
}

View File

@@ -8,7 +8,6 @@
package org.dspace.app.rest;
import static com.jayway.jsonpath.JsonPath.read;
import static jakarta.mail.internet.MimeUtility.encodeText;
import static java.util.UUID.randomUUID;
import static org.apache.commons.codec.CharEncoding.UTF_8;
import static org.apache.commons.collections.CollectionUtils.isEmpty;
@@ -366,7 +365,11 @@ public class BitstreamRestControllerIT extends AbstractControllerIntegrationTest
//2. A public item with a bitstream
String bitstreamContent = "0123456789";
String bitstreamName = "ภาษาไทย";
String bitstreamName = "ภาษาไทย-com-acentuação.pdf";
String expectedAscii = "-com-acentuacao.pdf";
String expectedUtf8Encoded =
"%E0%B8%A0%E0%B8%B2%E0%B8%A9%E0%B8%B2%E0%B9%84%E0%B8%97%E0%B8%A2-"
+ "com-acentua%C3%A7%C3%A3o.pdf";
try (InputStream is = IOUtils.toInputStream(bitstreamContent, CharEncoding.UTF_8)) {
@@ -390,7 +393,9 @@ public class BitstreamRestControllerIT extends AbstractControllerIntegrationTest
//We expect the content disposition to have the encoded bitstream name
.andExpect(header().string(
"Content-Disposition",
"attachment;filename=\"" + encodeText(bitstreamName) + "\""
String.format("attachment; filename=\"%s\"; filename*=UTF-8''%s",
expectedAscii,
expectedUtf8Encoded)
));
}