Merge pull request #10519 from toniprieto/browse-entries-desc-bug

Fix metadata browsing in descending order using a json.facet to retrieve index values
This commit is contained in:
kshepherd
2025-04-17 15:20:45 +02:00
committed by GitHub
3 changed files with 236 additions and 68 deletions

View File

@@ -23,7 +23,6 @@ import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.core.Context;
import org.dspace.discovery.DiscoverFacetField;
import org.dspace.discovery.DiscoverQuery;
import org.dspace.discovery.DiscoverQuery.SORT_ORDER;
import org.dspace.discovery.DiscoverResult;
@@ -34,7 +33,6 @@ import org.dspace.discovery.SearchService;
import org.dspace.discovery.SearchServiceException;
import org.dspace.discovery.SearchUtils;
import org.dspace.discovery.configuration.DiscoveryConfiguration;
import org.dspace.discovery.configuration.DiscoveryConfigurationParameters;
import org.dspace.discovery.indexobject.IndexableItem;
import org.dspace.services.factory.DSpaceServicesFactory;
@@ -181,32 +179,28 @@ public class SolrBrowseDAO implements BrowseDAO {
addLocationScopeFilter(query);
addDefaultFilterQueries(query);
if (distinct) {
DiscoverFacetField dff;
// To get the number of distinct values we use the next "json.facet" query param
// {"entries_count": {"type":"terms","field": "<fieldName>_filter", "limit":0, "numBuckets":true}}"
// We use a json.facet query for metadata browsing because it allows us to limit the results
// while obtaining the total number of facet values with numBuckets:true and sort in reverse order
// Example of json.facet query:
// {"<fieldName>": {"type":"terms","field": "<fieldName>_filter", "limit":0, "offset":0,
// "sort":"index desc", "numBuckets":true, "prefix":"<startsWith>"}}
ObjectNode jsonFacet = JsonNodeFactory.instance.objectNode();
ObjectNode entriesCount = JsonNodeFactory.instance.objectNode();
entriesCount.put("type", "terms");
entriesCount.put("field", facetField + "_filter");
entriesCount.put("limit", 0);
entriesCount.put("numBuckets", true);
jsonFacet.set("entries_count", entriesCount);
if (StringUtils.isNotBlank(startsWith)) {
dff = new DiscoverFacetField(facetField,
DiscoveryConfigurationParameters.TYPE_TEXT, limit,
DiscoveryConfigurationParameters.SORT.VALUE, startsWith, offset);
// Add the prefix to the json facet query
entriesCount.put("prefix", startsWith);
ObjectNode entriesFacet = JsonNodeFactory.instance.objectNode();
entriesFacet.put("type", "terms");
entriesFacet.put("field", facetField + "_filter");
entriesFacet.put("limit", limit);
entriesFacet.put("offset", offset);
entriesFacet.put("numBuckets", true);
if (ascending) {
entriesFacet.put("sort", "index");
} else {
dff = new DiscoverFacetField(facetField,
DiscoveryConfigurationParameters.TYPE_TEXT, limit,
DiscoveryConfigurationParameters.SORT.VALUE, offset);
entriesFacet.put("sort", "index desc");
}
query.addFacetField(dff);
query.setFacetMinCount(1);
if (StringUtils.isNotBlank(startsWith)) {
// Add the prefix to the json facet query
entriesFacet.put("prefix", startsWith);
}
jsonFacet.set(facetField, entriesFacet);
query.setMaxResults(0);
query.addProperty("json.facet", jsonFacet.toString());
} else {
@@ -282,27 +276,16 @@ public class SolrBrowseDAO implements BrowseDAO {
DiscoverResult resp = getSolrResponse();
List<FacetResult> facet = resp.getFacetResult(facetField);
int count = doCountQuery();
int start = 0;
int max = facet.size();
List<String[]> result = new ArrayList<>();
if (ascending) {
for (int i = start; i < (start + max) && i < count; i++) {
for (int i = 0; i < max && i < count; i++) {
FacetResult c = facet.get(i);
String freq = showFrequencies ? String.valueOf(c.getCount())
: "";
result.add(new String[] {c.getDisplayedValue(),
c.getAuthorityKey(), freq});
}
} else {
for (int i = count - start - 1; i >= count - (start + max)
&& i >= 0; i--) {
FacetResult c = facet.get(i);
String freq = showFrequencies ? String.valueOf(c.getCount())
: "";
result.add(new String[] {c.getDisplayedValue(),
c.getAuthorityKey(), freq});
}
}
return result;
}

View File

@@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.json.BucketBasedJsonFacet;
import org.apache.solr.client.solrj.response.json.BucketJsonFacet;
import org.apache.solr.client.solrj.response.json.NestableJsonFacet;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
@@ -988,8 +989,8 @@ public class SolrServiceImpl implements SearchService, IndexingService {
}
//Resolve our facet field values
resolveFacetFields(context, query, result, skipLoadingResponse, solrQueryResponse);
//Add total entries count for metadata browsing
resolveEntriesCount(result, solrQueryResponse);
//Resolve our json facet field values used for metadata browsing
resolveJsonFacetFields(context, result, solrQueryResponse);
}
// If any stale entries are found in the current page of results,
// we remove those stale entries and rerun the same query again.
@@ -1016,33 +1017,38 @@ public class SolrServiceImpl implements SearchService, IndexingService {
}
/**
* Stores the total count of entries for metadata index browsing. The count is calculated by the
* <code>json.facet</code> parameter with the following value:
* Process the 'json.facet' response, which is currently only used for metadata browsing
*
* <pre><code>
* {
* "entries_count": {
* "type": "terms",
* "field": "facetNameField_filter",
* "limit": 0,
* "prefix": "prefix_value",
* "numBuckets": true
* }
* }
* </code></pre>
*
* This value is returned in the <code>facets</code> field of the Solr response.
*
* @param result DiscoverResult object where the total entries count will be stored
* @param solrQueryResponse QueryResponse object containing the solr response
* @param context context object
* @param result the result object to add the facet results to
* @param solrQueryResponse the solr query response
* @throws SQLException if database error
*/
private void resolveEntriesCount(DiscoverResult result, QueryResponse solrQueryResponse) {
private void resolveJsonFacetFields(Context context, DiscoverResult result, QueryResponse solrQueryResponse)
throws SQLException {
NestableJsonFacet response = solrQueryResponse.getJsonFacetingResponse();
if (response != null) {
BucketBasedJsonFacet facet = response.getBucketBasedFacets("entries_count");
if (response != null && response.getBucketBasedFacetNames() != null) {
for (String facetName : response.getBucketBasedFacetNames()) {
BucketBasedJsonFacet facet = response.getBucketBasedFacets(facetName);
if (facet != null) {
result.setTotalEntries(facet.getNumBucketsCount());
for (BucketJsonFacet bucket : facet.getBuckets()) {
String facetValue = bucket.getVal() != null ? bucket.getVal().toString() : "";
String field = facetName + "_filter";
String displayedValue = transformDisplayedValue(context, field, facetValue);
String authorityValue = transformAuthorityValue(context, field, facetValue);
String sortValue = transformSortValue(context, field, facetValue);
String filterValue = displayedValue;
if (StringUtils.isNotBlank(authorityValue)) {
filterValue = authorityValue;
}
result.addFacetResult(facetName,
new DiscoverResult.FacetResult(filterValue, displayedValue,
authorityValue, sortValue, bucket.getCount(),
DiscoveryConfigurationParameters.TYPE_TEXT));
}
}
}
}
}

View File

@@ -261,6 +261,185 @@ public class BrowsesResourceControllerIT extends AbstractControllerIntegrationTe
)));
}
@Test
public void findBrowseBySubjectEntriesPagination() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
//1. A community-collection structure with one parent community with sub-community and two collections.
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
Collection col2 = CollectionBuilder.createCollection(context, child1).withName("Collection 2").build();
//2. Three public items that are readable by Anonymous with different subjects
Item publicItem1 = ItemBuilder.createItem(context, col1)
.withTitle("Public item 1")
.withIssueDate("2017-10-17")
.withAuthor("Smith, Donald").withAuthor("Doe, John")
.withSubject("ExtraEntry")
.build();
Item publicItem2 = ItemBuilder.createItem(context, col2)
.withTitle("Public item 2")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("TestingForMore").withSubject("ExtraEntry")
.build();
Item publicItem3 = ItemBuilder.createItem(context, col2)
.withTitle("Public item 2")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry")
.build();
Item withdrawnItem1 = ItemBuilder.createItem(context, col2)
.withTitle("Withdrawn item 1")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry").withSubject("WithdrawnEntry")
.withdrawn()
.build();
Item privateItem1 = ItemBuilder.createItem(context, col2)
.withTitle("Private item 1")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry").withSubject("PrivateEntry")
.makeUnDiscoverable()
.build();
context.restoreAuthSystemState();
//** WHEN **
//An anonymous user browses this endpoint to find which subjects are currently in the repository
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(0)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("AnotherTest", 1)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1")
.param("page","1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(1)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("ExtraEntry", 3)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1")
.param("page","2"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(2)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("TestingForMore", 2)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(0)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("TestingForMore", 2)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1")
.param("page","1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(1)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("ExtraEntry", 3)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1")
.param("page","2"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(2)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("AnotherTest", 1)
)));
}
@Test
public void findBrowseBySubjectEntriesWithAuthority() throws Exception {
configurationService.setProperty("choices.plugin.dc.subject",