Merge pull request #10519 from toniprieto/browse-entries-desc-bug

Fix metadata browsing in descending order using a json.facet to retrieve index values
This commit is contained in:
kshepherd
2025-04-17 15:20:45 +02:00
committed by GitHub
3 changed files with 236 additions and 68 deletions

View File

@@ -23,7 +23,6 @@ import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.DSpaceObject; import org.dspace.content.DSpaceObject;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.discovery.DiscoverFacetField;
import org.dspace.discovery.DiscoverQuery; import org.dspace.discovery.DiscoverQuery;
import org.dspace.discovery.DiscoverQuery.SORT_ORDER; import org.dspace.discovery.DiscoverQuery.SORT_ORDER;
import org.dspace.discovery.DiscoverResult; import org.dspace.discovery.DiscoverResult;
@@ -34,7 +33,6 @@ import org.dspace.discovery.SearchService;
import org.dspace.discovery.SearchServiceException; import org.dspace.discovery.SearchServiceException;
import org.dspace.discovery.SearchUtils; import org.dspace.discovery.SearchUtils;
import org.dspace.discovery.configuration.DiscoveryConfiguration; import org.dspace.discovery.configuration.DiscoveryConfiguration;
import org.dspace.discovery.configuration.DiscoveryConfigurationParameters;
import org.dspace.discovery.indexobject.IndexableItem; import org.dspace.discovery.indexobject.IndexableItem;
import org.dspace.services.factory.DSpaceServicesFactory; import org.dspace.services.factory.DSpaceServicesFactory;
@@ -181,32 +179,28 @@ public class SolrBrowseDAO implements BrowseDAO {
addLocationScopeFilter(query); addLocationScopeFilter(query);
addDefaultFilterQueries(query); addDefaultFilterQueries(query);
if (distinct) { if (distinct) {
DiscoverFacetField dff; // We use a json.facet query for metadata browsing because it allows us to limit the results
// while obtaining the total number of facet values with numBuckets:true and sort in reverse order
// To get the number of distinct values we use the next "json.facet" query param // Example of json.facet query:
// {"entries_count": {"type":"terms","field": "<fieldName>_filter", "limit":0, "numBuckets":true}}" // {"<fieldName>": {"type":"terms","field": "<fieldName>_filter", "limit":0, "offset":0,
// "sort":"index desc", "numBuckets":true, "prefix":"<startsWith>"}}
ObjectNode jsonFacet = JsonNodeFactory.instance.objectNode(); ObjectNode jsonFacet = JsonNodeFactory.instance.objectNode();
ObjectNode entriesCount = JsonNodeFactory.instance.objectNode(); ObjectNode entriesFacet = JsonNodeFactory.instance.objectNode();
entriesCount.put("type", "terms"); entriesFacet.put("type", "terms");
entriesCount.put("field", facetField + "_filter"); entriesFacet.put("field", facetField + "_filter");
entriesCount.put("limit", 0); entriesFacet.put("limit", limit);
entriesCount.put("numBuckets", true); entriesFacet.put("offset", offset);
jsonFacet.set("entries_count", entriesCount); entriesFacet.put("numBuckets", true);
if (ascending) {
if (StringUtils.isNotBlank(startsWith)) { entriesFacet.put("sort", "index");
dff = new DiscoverFacetField(facetField,
DiscoveryConfigurationParameters.TYPE_TEXT, limit,
DiscoveryConfigurationParameters.SORT.VALUE, startsWith, offset);
// Add the prefix to the json facet query
entriesCount.put("prefix", startsWith);
} else { } else {
dff = new DiscoverFacetField(facetField, entriesFacet.put("sort", "index desc");
DiscoveryConfigurationParameters.TYPE_TEXT, limit,
DiscoveryConfigurationParameters.SORT.VALUE, offset);
} }
query.addFacetField(dff); if (StringUtils.isNotBlank(startsWith)) {
query.setFacetMinCount(1); // Add the prefix to the json facet query
entriesFacet.put("prefix", startsWith);
}
jsonFacet.set(facetField, entriesFacet);
query.setMaxResults(0); query.setMaxResults(0);
query.addProperty("json.facet", jsonFacet.toString()); query.addProperty("json.facet", jsonFacet.toString());
} else { } else {
@@ -282,26 +276,15 @@ public class SolrBrowseDAO implements BrowseDAO {
DiscoverResult resp = getSolrResponse(); DiscoverResult resp = getSolrResponse();
List<FacetResult> facet = resp.getFacetResult(facetField); List<FacetResult> facet = resp.getFacetResult(facetField);
int count = doCountQuery(); int count = doCountQuery();
int start = 0;
int max = facet.size(); int max = facet.size();
List<String[]> result = new ArrayList<>(); List<String[]> result = new ArrayList<>();
if (ascending) {
for (int i = start; i < (start + max) && i < count; i++) { for (int i = 0; i < max && i < count; i++) {
FacetResult c = facet.get(i); FacetResult c = facet.get(i);
String freq = showFrequencies ? String.valueOf(c.getCount()) String freq = showFrequencies ? String.valueOf(c.getCount())
: ""; : "";
result.add(new String[] {c.getDisplayedValue(), result.add(new String[] {c.getDisplayedValue(),
c.getAuthorityKey(), freq}); c.getAuthorityKey(), freq});
}
} else {
for (int i = count - start - 1; i >= count - (start + max)
&& i >= 0; i--) {
FacetResult c = facet.get(i);
String freq = showFrequencies ? String.valueOf(c.getCount())
: "";
result.add(new String[] {c.getDisplayedValue(),
c.getAuthorityKey(), freq});
}
} }
return result; return result;

View File

@@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.json.BucketBasedJsonFacet; import org.apache.solr.client.solrj.response.json.BucketBasedJsonFacet;
import org.apache.solr.client.solrj.response.json.BucketJsonFacet;
import org.apache.solr.client.solrj.response.json.NestableJsonFacet; import org.apache.solr.client.solrj.response.json.NestableJsonFacet;
import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
@@ -988,8 +989,8 @@ public class SolrServiceImpl implements SearchService, IndexingService {
} }
//Resolve our facet field values //Resolve our facet field values
resolveFacetFields(context, query, result, skipLoadingResponse, solrQueryResponse); resolveFacetFields(context, query, result, skipLoadingResponse, solrQueryResponse);
//Add total entries count for metadata browsing //Resolve our json facet field values used for metadata browsing
resolveEntriesCount(result, solrQueryResponse); resolveJsonFacetFields(context, result, solrQueryResponse);
} }
// If any stale entries are found in the current page of results, // If any stale entries are found in the current page of results,
// we remove those stale entries and rerun the same query again. // we remove those stale entries and rerun the same query again.
@@ -1016,33 +1017,38 @@ public class SolrServiceImpl implements SearchService, IndexingService {
} }
/** /**
* Stores the total count of entries for metadata index browsing. The count is calculated by the * Process the 'json.facet' response, which is currently only used for metadata browsing
* <code>json.facet</code> parameter with the following value:
* *
* <pre><code> * @param context context object
* { * @param result the result object to add the facet results to
* "entries_count": { * @param solrQueryResponse the solr query response
* "type": "terms", * @throws SQLException if database error
* "field": "facetNameField_filter",
* "limit": 0,
* "prefix": "prefix_value",
* "numBuckets": true
* }
* }
* </code></pre>
*
* This value is returned in the <code>facets</code> field of the Solr response.
*
* @param result DiscoverResult object where the total entries count will be stored
* @param solrQueryResponse QueryResponse object containing the solr response
*/ */
private void resolveEntriesCount(DiscoverResult result, QueryResponse solrQueryResponse) { private void resolveJsonFacetFields(Context context, DiscoverResult result, QueryResponse solrQueryResponse)
throws SQLException {
NestableJsonFacet response = solrQueryResponse.getJsonFacetingResponse(); NestableJsonFacet response = solrQueryResponse.getJsonFacetingResponse();
if (response != null) { if (response != null && response.getBucketBasedFacetNames() != null) {
BucketBasedJsonFacet facet = response.getBucketBasedFacets("entries_count"); for (String facetName : response.getBucketBasedFacetNames()) {
if (facet != null) { BucketBasedJsonFacet facet = response.getBucketBasedFacets(facetName);
result.setTotalEntries(facet.getNumBucketsCount()); if (facet != null) {
result.setTotalEntries(facet.getNumBucketsCount());
for (BucketJsonFacet bucket : facet.getBuckets()) {
String facetValue = bucket.getVal() != null ? bucket.getVal().toString() : "";
String field = facetName + "_filter";
String displayedValue = transformDisplayedValue(context, field, facetValue);
String authorityValue = transformAuthorityValue(context, field, facetValue);
String sortValue = transformSortValue(context, field, facetValue);
String filterValue = displayedValue;
if (StringUtils.isNotBlank(authorityValue)) {
filterValue = authorityValue;
}
result.addFacetResult(facetName,
new DiscoverResult.FacetResult(filterValue, displayedValue,
authorityValue, sortValue, bucket.getCount(),
DiscoveryConfigurationParameters.TYPE_TEXT));
}
}
} }
} }
} }

View File

@@ -261,6 +261,185 @@ public class BrowsesResourceControllerIT extends AbstractControllerIntegrationTe
))); )));
} }
@Test
public void findBrowseBySubjectEntriesPagination() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
//1. A community-collection structure with one parent community with sub-community and two collections.
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
Collection col2 = CollectionBuilder.createCollection(context, child1).withName("Collection 2").build();
//2. Three public items that are readable by Anonymous with different subjects
Item publicItem1 = ItemBuilder.createItem(context, col1)
.withTitle("Public item 1")
.withIssueDate("2017-10-17")
.withAuthor("Smith, Donald").withAuthor("Doe, John")
.withSubject("ExtraEntry")
.build();
Item publicItem2 = ItemBuilder.createItem(context, col2)
.withTitle("Public item 2")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("TestingForMore").withSubject("ExtraEntry")
.build();
Item publicItem3 = ItemBuilder.createItem(context, col2)
.withTitle("Public item 2")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry")
.build();
Item withdrawnItem1 = ItemBuilder.createItem(context, col2)
.withTitle("Withdrawn item 1")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry").withSubject("WithdrawnEntry")
.withdrawn()
.build();
Item privateItem1 = ItemBuilder.createItem(context, col2)
.withTitle("Private item 1")
.withIssueDate("2016-02-13")
.withAuthor("Smith, Maria").withAuthor("Doe, Jane")
.withSubject("AnotherTest").withSubject("TestingForMore")
.withSubject("ExtraEntry").withSubject("PrivateEntry")
.makeUnDiscoverable()
.build();
context.restoreAuthSystemState();
//** WHEN **
//An anonymous user browses this endpoint to find which subjects are currently in the repository
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(0)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("AnotherTest", 1)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1")
.param("page","1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(1)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("ExtraEntry", 3)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("projection", "full")
.param("size", "1")
.param("page","2"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(2)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("TestingForMore", 2)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(0)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("TestingForMore", 2)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1")
.param("page","1"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(1)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("ExtraEntry", 3)
)));
getClient().perform(get("/api/discover/browses/subject/entries")
.param("sort", "value,desc")
.param("size", "1")
.param("page","2"))
//** THEN **
//The status has to be 200
.andExpect(status().isOk())
//We expect the content type to be "application/hal+json;charset=UTF-8"
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.number", is(2)))
//Check that there are indeed 3 different subjects
.andExpect(jsonPath("$.page.totalElements", is(3)))
//Check that the subject matches as expected
.andExpect(jsonPath("$._embedded.entries",
contains(BrowseEntryResourceMatcher.matchBrowseEntry("AnotherTest", 1)
)));
}
@Test @Test
public void findBrowseBySubjectEntriesWithAuthority() throws Exception { public void findBrowseBySubjectEntriesWithAuthority() throws Exception {
configurationService.setProperty("choices.plugin.dc.subject", configurationService.setProperty("choices.plugin.dc.subject",