Merge pull request #8958 from tdonohue/port_8953

[PORT to 7.x] Add flag Pattern.UNICODE_CHARACTER_CLASS to pattern compilation to recognize unicode characters
This commit is contained in:
Tim Donohue
2023-07-14 16:01:19 -05:00
committed by GitHub

View File

@@ -845,7 +845,7 @@ public class ItemIndexFactoryImpl extends DSpaceObjectIndexFactoryImpl<Indexable
private void saveFacetPrefixParts(SolrInputDocument doc, DiscoverySearchFilter searchFilter, String value,
String separator, String authority, String preferedLabel) {
value = StringUtils.normalizeSpace(value);
Pattern pattern = Pattern.compile("\\b\\w+\\b", Pattern.CASE_INSENSITIVE);
Pattern pattern = Pattern.compile("\\b\\w+\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
Matcher matcher = pattern.matcher(value);
while (matcher.find()) {
int index = matcher.start();