mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-17 15:03:18 +00:00
Merge pull request #2698 from toniprieto/DS-4034-strip-diacritics-master
DS-4034 - The "first few letters" search doesn't work with diacritics (master port)
This commit is contained in:
@@ -11,7 +11,6 @@ import java.sql.SQLException;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.dspace.content.Collection;
|
import org.dspace.content.Collection;
|
||||||
import org.dspace.content.Community;
|
import org.dspace.content.Community;
|
||||||
@@ -381,7 +380,7 @@ public class BrowseEngine {
|
|||||||
// this is the distinct table constrained to either community or collection
|
// this is the distinct table constrained to either community or collection
|
||||||
dao.setTable(browseIndex.getDistinctTableName());
|
dao.setTable(browseIndex.getDistinctTableName());
|
||||||
|
|
||||||
dao.setStartsWith(StringUtils.lowerCase(scope.getStartsWith()));
|
dao.setStartsWith(normalizeJumpToValue(scope.getStartsWith()));
|
||||||
// remind the DAO that this is a distinct value browse, so it knows what sort
|
// remind the DAO that this is a distinct value browse, so it knows what sort
|
||||||
// of query to build
|
// of query to build
|
||||||
dao.setDistinct(true);
|
dao.setDistinct(true);
|
||||||
|
@@ -9,6 +9,7 @@ package org.dspace.sort;
|
|||||||
|
|
||||||
import org.dspace.text.filter.DecomposeDiactritics;
|
import org.dspace.text.filter.DecomposeDiactritics;
|
||||||
import org.dspace.text.filter.LowerCaseAndTrim;
|
import org.dspace.text.filter.LowerCaseAndTrim;
|
||||||
|
import org.dspace.text.filter.StripDiacritics;
|
||||||
import org.dspace.text.filter.TextFilter;
|
import org.dspace.text.filter.TextFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -19,6 +20,7 @@ import org.dspace.text.filter.TextFilter;
|
|||||||
public class OrderFormatAuthor extends AbstractTextFilterOFD {
|
public class OrderFormatAuthor extends AbstractTextFilterOFD {
|
||||||
{
|
{
|
||||||
filters = new TextFilter[] {new DecomposeDiactritics(),
|
filters = new TextFilter[] {new DecomposeDiactritics(),
|
||||||
|
new StripDiacritics(),
|
||||||
new LowerCaseAndTrim()};
|
new LowerCaseAndTrim()};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -9,6 +9,7 @@ package org.dspace.sort;
|
|||||||
|
|
||||||
import org.dspace.text.filter.DecomposeDiactritics;
|
import org.dspace.text.filter.DecomposeDiactritics;
|
||||||
import org.dspace.text.filter.LowerCaseAndTrim;
|
import org.dspace.text.filter.LowerCaseAndTrim;
|
||||||
|
import org.dspace.text.filter.StripDiacritics;
|
||||||
import org.dspace.text.filter.TextFilter;
|
import org.dspace.text.filter.TextFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -19,6 +20,7 @@ import org.dspace.text.filter.TextFilter;
|
|||||||
public class OrderFormatText extends AbstractTextFilterOFD {
|
public class OrderFormatText extends AbstractTextFilterOFD {
|
||||||
{
|
{
|
||||||
filters = new TextFilter[] {new DecomposeDiactritics(),
|
filters = new TextFilter[] {new DecomposeDiactritics(),
|
||||||
|
new StripDiacritics(),
|
||||||
new LowerCaseAndTrim()};
|
new LowerCaseAndTrim()};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -827,6 +827,158 @@ public class BrowsesResourceControllerIT extends AbstractControllerIntegrationTe
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBrowseByEntriesStartsWithAndDiacritics() throws Exception {
|
||||||
|
context.turnOffAuthorisationSystem();
|
||||||
|
|
||||||
|
//** GIVEN **
|
||||||
|
//1. A community-collection structure with one parent community with sub-community and two collections.
|
||||||
|
parentCommunity = CommunityBuilder.createCommunity(context)
|
||||||
|
.withName("Parent Community")
|
||||||
|
.build();
|
||||||
|
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
|
||||||
|
.withName("Sub Community")
|
||||||
|
.build();
|
||||||
|
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
|
||||||
|
Collection col2 = CollectionBuilder.createCollection(context, child1).withName("Collection 2").build();
|
||||||
|
|
||||||
|
//2. 4 public items that are readable by Anonymous
|
||||||
|
Item item1 = ItemBuilder.createItem(context, col1)
|
||||||
|
.withTitle("Item1")
|
||||||
|
.withAuthor("Álvarez, Nombre")
|
||||||
|
.withIssueDate("1912-06-23")
|
||||||
|
.withSubject("Teléfono")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Item item2 = ItemBuilder.createItem(context, col1)
|
||||||
|
.withTitle("Item2")
|
||||||
|
.withAuthor("Ögren, Name")
|
||||||
|
.withIssueDate("1982-06-25")
|
||||||
|
.withSubject("Televisor")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Item item3 = ItemBuilder.createItem(context, col2)
|
||||||
|
.withTitle("Item3")
|
||||||
|
.withAuthor("Azuaga, Nombre")
|
||||||
|
.withIssueDate("1990")
|
||||||
|
.withSubject("Telecomunicaciones")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Item item4 = ItemBuilder.createItem(context, col2)
|
||||||
|
.withTitle("Item4")
|
||||||
|
.withAuthor("Alonso, Nombre")
|
||||||
|
.withAuthor("Ortiz, Nombre")
|
||||||
|
.withIssueDate("1995-05-23")
|
||||||
|
.withSubject("Guion")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
// ---- BROWSES BY ENTRIES ----
|
||||||
|
|
||||||
|
//** WHEN **
|
||||||
|
//An anonymous user browses the entries in the Browse by Author endpoint
|
||||||
|
//with startsWith set to A
|
||||||
|
getClient().perform(get("/api/discover/browses/author/entries?startsWith=A")
|
||||||
|
.param("size", "4"))
|
||||||
|
|
||||||
|
//** THEN **
|
||||||
|
//The status has to be 200 OK
|
||||||
|
.andExpect(status().isOk())
|
||||||
|
//We expect the content type to be "application/hal+json;charset=UTF-8"
|
||||||
|
.andExpect(content().contentType(contentType))
|
||||||
|
|
||||||
|
//We expect 3 elements
|
||||||
|
.andExpect(jsonPath("$.page.totalElements", is(3)))
|
||||||
|
//As entry browsing works as a filter, we expect to be on page 0
|
||||||
|
.andExpect(jsonPath("$.page.number", is(0)))
|
||||||
|
|
||||||
|
//Verify that the index filters to the "Alonso, Nombre", "Álvarez, Nombre" and "Azuaga, Nombre"
|
||||||
|
// and diacritics are ignored in sorting
|
||||||
|
.andExpect(jsonPath("$._embedded.entries",
|
||||||
|
contains(BrowseEntryResourceMatcher.matchBrowseEntry("Alonso, Nombre", 1),
|
||||||
|
BrowseEntryResourceMatcher.matchBrowseEntry("Álvarez, Nombre", 1),
|
||||||
|
BrowseEntryResourceMatcher.matchBrowseEntry("Azuaga, Nombre", 1)
|
||||||
|
)))
|
||||||
|
|
||||||
|
//Verify startsWith parameter is included in the links
|
||||||
|
.andExpect(jsonPath("$._links.self.href", containsString("?startsWith=A")));
|
||||||
|
|
||||||
|
//** WHEN **
|
||||||
|
//An anonymous user browses the entries in the Browse by Author endpoint
|
||||||
|
//with startsWith set to Ú (accented)
|
||||||
|
getClient().perform(get("/api/discover/browses/author/entries?startsWith=Ó"))
|
||||||
|
|
||||||
|
//** THEN **
|
||||||
|
//The status has to be 200 OK
|
||||||
|
.andExpect(status().isOk())
|
||||||
|
//We expect the content type to be "application/hal+json;charset=UTF-8"
|
||||||
|
.andExpect(content().contentType(contentType))
|
||||||
|
|
||||||
|
//We expect 2 elements
|
||||||
|
.andExpect(jsonPath("$.page.totalElements", is(2)))
|
||||||
|
//As entry browsing works as a filter, we expect to be on page 0
|
||||||
|
.andExpect(jsonPath("$.page.number", is(0)))
|
||||||
|
|
||||||
|
//Verify that the index filters to the "Ögren, Name"" and "Ortiz, Nombre"
|
||||||
|
.andExpect(jsonPath("$._embedded.entries",
|
||||||
|
contains(BrowseEntryResourceMatcher.matchBrowseEntry("Ögren, Name", 1),
|
||||||
|
BrowseEntryResourceMatcher.matchBrowseEntry("Ortiz, Nombre", 1)
|
||||||
|
)))
|
||||||
|
//Verify that the startsWith paramater is included in the links
|
||||||
|
.andExpect(jsonPath("$._links.self.href", containsString("?startsWith=Ó")));
|
||||||
|
|
||||||
|
|
||||||
|
//** WHEN **
|
||||||
|
//An anonymous user browses the entries in the Browse by Subject endpoint
|
||||||
|
//with startsWith set to Cana
|
||||||
|
getClient().perform(get("/api/discover/browses/subject/entries?startsWith=Tele"))
|
||||||
|
|
||||||
|
//** THEN **
|
||||||
|
//The status has to be 200 OK
|
||||||
|
.andExpect(status().isOk())
|
||||||
|
//We expect the content type to be "application/hal+json;charset=UTF-8"
|
||||||
|
.andExpect(content().contentType(contentType))
|
||||||
|
|
||||||
|
//We expect 3 elements
|
||||||
|
.andExpect(jsonPath("$.page.totalElements", is(3)))
|
||||||
|
//As entry browsing works as a filter, we expect to be on page 0
|
||||||
|
.andExpect(jsonPath("$.page.number", is(0)))
|
||||||
|
|
||||||
|
//Verify that the index filters to the "Telecomunicaciones', "Teléfono" and "Televisor" and
|
||||||
|
// it is sorted ignoring diacritics
|
||||||
|
.andExpect(jsonPath("$._embedded.entries",
|
||||||
|
contains(BrowseEntryResourceMatcher.matchBrowseEntry("Telecomunicaciones", 1),
|
||||||
|
BrowseEntryResourceMatcher.matchBrowseEntry("Teléfono", 1),
|
||||||
|
BrowseEntryResourceMatcher.matchBrowseEntry("Televisor", 1)
|
||||||
|
)))
|
||||||
|
//Verify that the startsWith paramater is included in the links
|
||||||
|
.andExpect(jsonPath("$._links.self.href", containsString("?startsWith=Tele")));
|
||||||
|
|
||||||
|
//** WHEN **
|
||||||
|
//An anonymous user browses the entries in the Browse by Subject endpoint
|
||||||
|
//with startsWith set to Guión
|
||||||
|
getClient().perform(get("/api/discover/browses/subject/entries?startsWith=Guión"))
|
||||||
|
|
||||||
|
//** THEN **
|
||||||
|
//The status has to be 200 OK
|
||||||
|
.andExpect(status().isOk())
|
||||||
|
//We expect the content type to be "application/hal+json;charset=UTF-8"
|
||||||
|
.andExpect(content().contentType(contentType))
|
||||||
|
|
||||||
|
//We expect only the entry "Guion" to be present
|
||||||
|
.andExpect(jsonPath("$.page.totalElements", is(1)))
|
||||||
|
//As entry browsing works as a filter, we expect to be on page 0
|
||||||
|
.andExpect(jsonPath("$.page.number", is(0)))
|
||||||
|
|
||||||
|
//Verify that the index filters to the "Guion"
|
||||||
|
.andExpect(jsonPath("$._embedded.entries",
|
||||||
|
contains(BrowseEntryResourceMatcher.matchBrowseEntry("Guion", 1)
|
||||||
|
)))
|
||||||
|
//Verify that the startsWith paramater is included in the links
|
||||||
|
.andExpect(jsonPath("$._links.self.href", containsString("?startsWith=Guión")));
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBrowseByItemsStartsWith() throws Exception {
|
public void testBrowseByItemsStartsWith() throws Exception {
|
||||||
context.turnOffAuthorisationSystem();
|
context.turnOffAuthorisationSystem();
|
||||||
|
Reference in New Issue
Block a user