DS-3489: DiscoverQueryBuilder implementation part 1

This commit is contained in:
Tom Desair
2017-09-20 00:11:41 +02:00
parent 6b66f1282b
commit 7871b618c4
15 changed files with 519 additions and 10 deletions

View File

@@ -354,6 +354,10 @@
<groupId>commons-collections</groupId> <groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId> <artifactId>commons-collections</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId> <artifactId>commons-dbcp2</artifactId>

View File

@@ -7,6 +7,9 @@
*/ */
package org.dspace.discovery; package org.dspace.discovery;
import org.apache.commons.collections4.CollectionUtils;
import org.dspace.discovery.configuration.DiscoverySearchFilterFacet;
import java.util.*; import java.util.*;
/** /**
@@ -283,4 +286,58 @@ public class DiscoverQuery {
public void setSpellCheck(boolean spellCheck) { public void setSpellCheck(boolean spellCheck) {
this.spellCheck = spellCheck; this.spellCheck = spellCheck;
} }
public void addYearRangeFacet(DiscoverySearchFilterFacet facet, FacetYearRange facetYearRange) {
if(facetYearRange.isValid()) {
int newestYear = facetYearRange.getNewestYear();
int oldestYear = facetYearRange.getOldestYear();
String dateFacet = facetYearRange.getDateFacet();
int gap = facetYearRange.getYearGap();
// We need to determine our top year so we can start our count from a clean year
// Example: 2001 and a gap from 10 we need the following result: 2010 - 2000 ; 2000 - 1990 hence the top year
int topYear = getTopYear(newestYear, gap);
if (gap == 1) {
//We need a list of our years
//We have a date range add faceting for our field
//The faceting will automatically be limited to the 10 years in our span due to our filterquery
this.addFacetField(new DiscoverFacetField(facet.getIndexFieldName(), facet.getType(), 10, facet.getSortOrderSidebar()));
} else {
List<String> facetQueries = buildFacetQueriesWithGap(newestYear, oldestYear, dateFacet, gap, topYear);
for (String facetQuery : CollectionUtils.emptyIfNull(facetQueries)) {
this.addFacetQuery(facetQuery);
}
}
}
}
private List<String> buildFacetQueriesWithGap(int newestYear, int oldestYear, String dateFacet, int gap, int topYear) {
List<String> facetQueries = new LinkedList<>();
//Create facet queries but limit them to 11 (11 == when we need to show a "show more" url)
for (int year = topYear; year > oldestYear && (facetQueries.size() < 11); year -= gap) {
//Add a filter to remove the last year only if we aren't the last year
int bottomYear = year - gap;
//Make sure we don't go below our last year found
if (bottomYear < oldestYear) {
bottomYear = oldestYear;
}
//Also make sure we don't go above our newest year
int currentTop = year;
if ((year == topYear)) {
currentTop = newestYear;
} else {
//We need to do -1 on this one to get a better result
currentTop--;
}
facetQueries.add(dateFacet + ":[" + bottomYear + " TO " + currentTop + "]");
}
return facetQueries;
}
private int getTopYear(int newestYear, int gap) {
return (int) (Math.ceil((float) (newestYear) / gap) * gap);
}
} }

View File

@@ -0,0 +1,142 @@
package org.dspace.discovery;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Context;
import org.dspace.discovery.configuration.DiscoverySearchFilterFacet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilty class that represents the year range for a date facet
*/
public class FacetYearRange {
private static final Pattern PATTERN = Pattern.compile("\\[(.*? TO .*?)\\]");
private DiscoverySearchFilterFacet facet;
private String dateFacet;
private int oldestYear = -1;
private int newestYear = -1;
public FacetYearRange(DiscoverySearchFilterFacet facet) {
this.facet = facet;
}
public String getDateFacet() {
return dateFacet;
}
public int getOldestYear() {
return oldestYear;
}
public int getNewestYear() {
return newestYear;
}
public boolean isValid() {
return oldestYear != -1 && newestYear != -1;
}
public void calculateRange(Context context, List<String> filterQueries, DSpaceObject scope, SearchService searchService) throws SearchServiceException {
dateFacet = facet.getIndexFieldName() + ".year";
//Get a range query so we can create facet queries ranging from our first to our last date
//Attempt to determine our oldest & newest year by checking for previously selected filters
lookupPreviousRangeInFilterQueries(filterQueries);
//Check if we have found a range, if not then retrieve our first & last year using Solr
if (oldestYear == -1 && newestYear == -1) {
calculateNewRangeBasedOnSearchIndex(context, filterQueries, scope, searchService);
}
}
private void lookupPreviousRangeInFilterQueries(List<String> filterQueries) {
for (String filterQuery : filterQueries) {
if (filterQuery.startsWith(dateFacet + ":")) {
//Check for a range
Matcher matcher = PATTERN.matcher(filterQuery);
boolean hasPattern = matcher.find();
if (hasPattern) {
filterQuery = matcher.group(0);
//We have a range
//Resolve our range to a first & last year
int tempOldYear = Integer.parseInt(filterQuery.split(" TO ")[0].replace("[", "").trim());
int tempNewYear = Integer.parseInt(filterQuery.split(" TO ")[1].replace("]", "").trim());
//Check if we have a further filter (or a first one found)
if (tempNewYear < newestYear || oldestYear < tempOldYear || newestYear == -1) {
oldestYear = tempOldYear;
newestYear = tempNewYear;
}
} else {
if (filterQuery.indexOf(" OR ") != -1) {
//Should always be the case
filterQuery = filterQuery.split(" OR ")[0];
}
//We should have a single date
oldestYear = Integer.parseInt(filterQuery.split(":")[1].trim());
newestYear = oldestYear;
//No need to look further
break;
}
}
}
}
private void calculateNewRangeBasedOnSearchIndex(Context context, List<String> filterQueries, DSpaceObject scope, SearchService searchService) throws SearchServiceException {
DiscoverQuery yearRangeQuery = new DiscoverQuery();
yearRangeQuery.setMaxResults(1);
//Set our query to anything that has this value
yearRangeQuery.addFieldPresentQueries(dateFacet);
//Set sorting so our last value will appear on top
yearRangeQuery.setSortField(dateFacet + "_sort", DiscoverQuery.SORT_ORDER.asc);
yearRangeQuery.addFilterQueries(filterQueries.toArray(new String[filterQueries.size()]));
yearRangeQuery.addSearchField(dateFacet);
DiscoverResult lastYearResult = searchService.search(context, scope, yearRangeQuery);
if (0 < lastYearResult.getDspaceObjects().size()) {
List<DiscoverResult.SearchDocument> searchDocuments = lastYearResult.getSearchDocument(lastYearResult.getDspaceObjects().get(0));
if (0 < searchDocuments.size() && 0 < searchDocuments.get(0).getSearchFieldValues(dateFacet).size()) {
oldestYear = Integer.parseInt(searchDocuments.get(0).getSearchFieldValues(dateFacet).get(0));
}
}
//Now get the first year
yearRangeQuery.setSortField(dateFacet + "_sort", DiscoverQuery.SORT_ORDER.desc);
DiscoverResult firstYearResult = searchService.search(context, scope, yearRangeQuery);
if (0 < firstYearResult.getDspaceObjects().size()) {
List<DiscoverResult.SearchDocument> searchDocuments = firstYearResult.getSearchDocument(firstYearResult.getDspaceObjects().get(0));
if (0 < searchDocuments.size() && 0 < searchDocuments.get(0).getSearchFieldValues(dateFacet).size()) {
newestYear = Integer.parseInt(searchDocuments.get(0).getSearchFieldValues(dateFacet).get(0));
}
}
}
public int getYearGap() {
int gap = 1;
//Attempt to retrieve our gap using the algorithm below
int yearDifference = newestYear - oldestYear;
if (yearDifference != 0) {
gap = round((double) yearDifference / facet.getFacetLimit(), 10);
}
return gap;
}
private int round(double number, int multiple) {
int result = new Double(Math.ceil(number)).intValue();
//If not already multiple of given number
if (result % multiple != 0) {
int division = (result / multiple) + 1;
result = division * multiple;
}
return result;
}
}

View File

@@ -11,6 +11,7 @@ import org.dspace.content.DSpaceObject;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.discovery.configuration.DiscoveryMoreLikeThisConfiguration; import org.dspace.discovery.configuration.DiscoveryMoreLikeThisConfiguration;
import org.dspace.discovery.configuration.DiscoverySearchFilterFacet;
import java.io.InputStream; import java.io.InputStream;
import java.sql.SQLException; import java.sql.SQLException;
@@ -148,4 +149,6 @@ public interface SearchService {
* @return query with any special characters escaped * @return query with any special characters escaped
*/ */
String escapeQueryChars(String query); String escapeQueryChars(String query);
FacetYearRange getFacetYearRange(Context context, DSpaceObject scope, DiscoverySearchFilterFacet facet, List<String> filterQueries) throws SearchServiceException;
} }

View File

@@ -2393,4 +2393,11 @@ public class SolrServiceImpl implements SearchService, IndexingService {
// rely on special characters to separate the field from the query value) // rely on special characters to separate the field from the query value)
return ClientUtils.escapeQueryChars(query); return ClientUtils.escapeQueryChars(query);
} }
@Override
public FacetYearRange getFacetYearRange(Context context, DSpaceObject scope, DiscoverySearchFilterFacet facet, List<String> filterQueries) throws SearchServiceException {
FacetYearRange result = new FacetYearRange(facet);
result.calculateRange(context, filterQueries, scope, this);
return result;
}
} }

View File

@@ -15,6 +15,9 @@
package org.dspace.discovery.configuration; package org.dspace.discovery.configuration;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@@ -58,4 +61,13 @@ public class DiscoverySortConfiguration {
public void setDefaultSortOrder(SORT_ORDER defaultSortOrder) { public void setDefaultSortOrder(SORT_ORDER defaultSortOrder) {
this.defaultSortOrder = defaultSortOrder; this.defaultSortOrder = defaultSortOrder;
} }
public boolean isValidSortField(String sortField) {
for (DiscoverySortFieldConfiguration sortFieldConfiguration : CollectionUtils.emptyIfNull(sortFields)) {
if(StringUtils.equals(sortFieldConfiguration.getMetadataField(), sortField)) {
return true;
}
}
return false;
}
} }

View File

@@ -257,7 +257,6 @@
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId> <artifactId>commons-collections4</artifactId>
<version>4.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>joda-time</groupId> <groupId>joda-time</groupId>

View File

@@ -2,9 +2,11 @@ package org.dspace.app.rest;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.app.rest.exception.InvalidRequestException;
import org.dspace.app.rest.model.DiscoveryRest; import org.dspace.app.rest.model.DiscoveryRest;
import org.dspace.app.rest.parameter.SearchFilter; import org.dspace.app.rest.parameter.SearchFilter;
import org.dspace.app.rest.repository.AbstractDSpaceRestRepository; import org.dspace.app.rest.repository.AbstractDSpaceRestRepository;
import org.dspace.app.rest.utils.DiscoverQueryBuilder;
import org.dspace.app.rest.utils.ScopeResolver; import org.dspace.app.rest.utils.ScopeResolver;
import org.dspace.content.DSpaceObject; import org.dspace.content.DSpaceObject;
import org.dspace.core.Context; import org.dspace.core.Context;
@@ -38,11 +40,14 @@ public class DiscoveryRestController extends AbstractDSpaceRestRepository implem
@Autowired @Autowired
private DiscoveryConfigurationService searchConfigurationService; private DiscoveryConfigurationService searchConfigurationService;
@Autowired
private SearchService searchService;
@Autowired @Autowired
private ScopeResolver scopeResolver; private ScopeResolver scopeResolver;
@Autowired @Autowired
private SearchService searchService; private DiscoverQueryBuilder queryBuilder;
@Override @Override
public void afterPropertiesSet() throws Exception { public void afterPropertiesSet() throws Exception {
@@ -66,6 +71,7 @@ public class DiscoveryRestController extends AbstractDSpaceRestRepository implem
//TODO Call DiscoveryConfigurationConverter on configuration to convert this API model to the REST model //TODO Call DiscoveryConfigurationConverter on configuration to convert this API model to the REST model
//TODO Return REST model //TODO Return REST model
//TODO set "hasMore" property on facets
} }
@RequestMapping(method = RequestMethod.GET, value = "/search/objects") @RequestMapping(method = RequestMethod.GET, value = "/search/objects")
@@ -88,16 +94,19 @@ public class DiscoveryRestController extends AbstractDSpaceRestRepository implem
DSpaceObject scopeObject = scopeResolver.resolveScope(context, dsoScope); DSpaceObject scopeObject = scopeResolver.resolveScope(context, dsoScope);
DiscoveryConfiguration configuration = searchConfigurationService.getDiscoveryConfigurationByNameOrDso(configurationName, scopeObject); DiscoveryConfiguration configuration = searchConfigurationService.getDiscoveryConfigurationByNameOrDso(configurationName, scopeObject);
try {
DiscoverQuery discoverQuery = queryBuilder.buildQuery(context, scopeObject, configuration, query, searchFilters, dsoType, page);
DiscoverResult searchResult = searchService.search(context, scopeObject, discoverQuery);
//TODO CHECK org.dspace.app.xmlui.aspect.discovery.SidebarFacetsTransformer#getQueryArgs } catch (InvalidRequestException e) {
//TODO DiscoverQuery discoverQuery = discoverQueryBuilder.buildQuery(configuration, scopeObject, query, searchFilters, dsoType, page); log.warn("Received an invalid request", e);
//TODO TOM handle invalid request
} catch (SearchServiceException e) {
log.error("Error while searching with Discovery", e);
//TODO TOM handle search exception
}
//try { //TODO convert search result to DSO list
//TODO DiscoverResult search = searchService.search(context, scopeObject, discoverQuery);
//} catch (SearchServiceException ex) {
// log.error("Error while searching with Discovery", ex);
//}
} }
@RequestMapping(method = RequestMethod.GET, value = "/facets") @RequestMapping(method = RequestMethod.GET, value = "/facets")

View File

@@ -0,0 +1,10 @@
package org.dspace.app.rest.exception;
/**
* Created by tom on 19/09/2017.
*/
public class InvalidDSpaceObjectTypeException extends InvalidRequestException {
public InvalidDSpaceObjectTypeException(String message) {
super(message);
}
}

View File

@@ -0,0 +1,14 @@
package org.dspace.app.rest.exception;
/**
* Created by tom on 19/09/2017.
*/
public class InvalidRequestException extends Exception {
public InvalidRequestException(String message) {
super(message);
}
public InvalidRequestException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,12 @@
package org.dspace.app.rest.exception;
import java.sql.SQLException;
/**
* Created by tom on 19/09/2017.
*/
public class InvalidSearchFilterException extends InvalidRequestException {
public InvalidSearchFilterException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,10 @@
package org.dspace.app.rest.exception;
/**
* Created by tom on 19/09/2017.
*/
public class InvalidSortingException extends InvalidRequestException {
public InvalidSortingException(String message) {
super(message);
}
}

View File

@@ -15,4 +15,16 @@ public class SearchFilter {
this.operator = operator; this.operator = operator;
this.value = value; this.value = value;
} }
public String getName() {
return name;
}
public String getOperator() {
return operator;
}
public String getValue() {
return value;
}
} }

View File

@@ -0,0 +1,213 @@
package org.dspace.app.rest.utils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.app.rest.exception.InvalidDSpaceObjectTypeException;
import org.dspace.app.rest.exception.InvalidRequestException;
import org.dspace.app.rest.exception.InvalidSearchFilterException;
import org.dspace.app.rest.exception.InvalidSortingException;
import org.dspace.app.rest.parameter.SearchFilter;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.discovery.*;
import org.dspace.discovery.configuration.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* Created by tom on 19/09/2017.
* TODO TOM UNIT TEST
*/
@Component
public class DiscoverQueryBuilder {
private static final Logger log = Logger.getLogger(DiscoverQueryBuilder.class);
@Autowired
private SearchService searchService;
public DiscoverQuery buildQuery(Context context, DSpaceObject scope,
DiscoveryConfiguration discoveryConfiguration,
String query, List<SearchFilter> searchFilters,
String dsoType, Pageable page)
throws InvalidRequestException {
DiscoverQuery queryArgs = buildBaseQueryForConfiguration(discoveryConfiguration);
//Add search filters
queryArgs.addFilterQueries(convertFilters(context, searchFilters));
//Set search query
if (StringUtils.isNotBlank(query)) {
queryArgs.setQuery(searchService.escapeQueryChars(query));
}
//Limit results to DSO type
if (StringUtils.isNotBlank(dsoType)) {
queryArgs.setDSpaceObjectFilter(getDsoTypeId(dsoType));
}
//When all search criteria are set, configure facet results
addFaceting(context, scope, queryArgs, discoveryConfiguration);
//Configure pagination and sorting
configurePagination(page, queryArgs);
configureSorting(page, queryArgs, discoveryConfiguration.getSearchSortConfiguration());
return queryArgs;
}
private DiscoverQuery buildBaseQueryForConfiguration(DiscoveryConfiguration discoveryConfiguration) {
DiscoverQuery queryArgs = new DiscoverQuery();
queryArgs.addFilterQueries(discoveryConfiguration.getDefaultFilterQueries()
.toArray(new String[discoveryConfiguration.getDefaultFilterQueries().size()]));
return queryArgs;
}
private void configureSorting(Pageable page, DiscoverQuery queryArgs, DiscoverySortConfiguration searchSortConfiguration) throws InvalidSortingException {
String sortBy = null;
String sortOrder = null;
//Read the Pageable object if there is one
if (page != null) {
Sort sort = page.getSort();
if (sort != null && sort.iterator().hasNext()) {
Sort.Order order = sort.iterator().next();
sortBy = order.getProperty();
sortOrder = order.getDirection().name();
}
}
//Load defaults if we did not receive values
if (sortBy == null) {
sortBy = getDefaultSortField(searchSortConfiguration);
}
if (sortOrder == null) {
sortOrder = getDefaultSortDirection(searchSortConfiguration, sortOrder);
}
//Update Discovery query
if (sortBy != null && searchSortConfiguration.isValidSortField(sortBy)) {
if ("asc".equalsIgnoreCase(sortOrder)) {
queryArgs.setSortField(sortBy, DiscoverQuery.SORT_ORDER.asc);
} else if ("desc".equalsIgnoreCase(sortOrder)) {
queryArgs.setSortField(sortBy, DiscoverQuery.SORT_ORDER.desc);
} else {
throw new InvalidSortingException(sortOrder + " is not a valid sort order");
}
} else {
throw new InvalidSortingException(sortBy + " is not a valid sort field");
}
}
private String getDefaultSortDirection(DiscoverySortConfiguration searchSortConfiguration, String sortOrder) {
if (searchSortConfiguration != null) {
sortOrder = searchSortConfiguration.getDefaultSortOrder()
.toString();
}
return sortOrder;
}
private String getDefaultSortField(DiscoverySortConfiguration searchSortConfiguration) {
String sortBy;// Attempt to find the default one, if none found we use SCORE
sortBy = "score";
if (searchSortConfiguration != null) {
for (DiscoverySortFieldConfiguration sortFieldConfiguration : searchSortConfiguration
.getSortFields()) {
if (sortFieldConfiguration.equals(searchSortConfiguration
.getDefaultSort())) {
sortBy = SearchUtils
.getSearchService()
.toSortFieldIndex(
sortFieldConfiguration
.getMetadataField(),
sortFieldConfiguration.getType());
}
}
}
return sortBy;
}
private void configurePagination(Pageable page, DiscoverQuery queryArgs) {
if (page != null) {
queryArgs.setMaxResults(page.getPageSize());
queryArgs.setStart(page.getOffset());
}
}
private int getDsoTypeId(String dsoType) throws InvalidDSpaceObjectTypeException {
int index = ArrayUtils.indexOf(Constants.typeText, dsoType.toUpperCase());
if (index < 0) {
throw new InvalidDSpaceObjectTypeException(dsoType + " is not a valid DSpace Object type");
}
return index;
}
private String[] convertFilters(Context context, List<SearchFilter> searchFilters) throws InvalidSearchFilterException {
ArrayList<String> filterQueries = new ArrayList<>(searchFilters.size());
try {
//TODO TOM take into account OR filters
for (SearchFilter searchFilter : CollectionUtils.emptyIfNull(searchFilters)) {
DiscoverFilterQuery filterQuery = searchService.toFilterQuery(context,
searchFilter.getName(), searchFilter.getOperator(), searchFilter.getValue());
if (filterQuery != null) {
filterQueries.add(filterQuery.getFilterQuery());
}
}
} catch (SQLException e) {
throw new InvalidSearchFilterException("There was a problem parsing the search filters.", e);
}
return filterQueries.toArray(new String[filterQueries.size()]);
}
private DiscoverQuery addFaceting(Context context, DSpaceObject scope, DiscoverQuery queryArgs, DiscoveryConfiguration discoveryConfiguration) {
List<DiscoverySearchFilterFacet> facets = discoveryConfiguration.getSidebarFacets();
log.debug("facets for configuration " + discoveryConfiguration.getId() + ": " + (facets != null ? facets.size() : null));
if (facets != null) {
queryArgs.setFacetMinCount(1);
/** enable faceting of search results */
for (DiscoverySearchFilterFacet facet : facets) {
if (facet.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)) {
try {
FacetYearRange facetYearRange = searchService.getFacetYearRange(context, scope, facet, queryArgs.getFilterQueries());
queryArgs.addYearRangeFacet(facet, facetYearRange);
} catch (Exception e) {
log.error(LogManager.getHeader(context, "Error in Discovery while setting up date facet range", "date facet: " + facet), e);
}
} else {
int facetLimit = facet.getFacetLimit();
//Add one to our facet limit to make sure that if we have more then the shown facets that we show our "show more" url
facetLimit++;
queryArgs.addFacetField(new DiscoverFacetField(facet.getIndexFieldName(), facet.getType(), facetLimit, facet.getSortOrderSidebar()));
}
}
}
return queryArgs;
}
}

View File

@@ -1101,6 +1101,11 @@
<version>3.2.2</version> <version>3.2.2</version>
<!-- <version>3.1</version> xmlui - wing --> <!-- <version>3.1</version> xmlui - wing -->
</dependency> </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.1</version>
</dependency>
<dependency> <dependency>
<groupId>commons-configuration</groupId> <groupId>commons-configuration</groupId>
<artifactId>commons-configuration</artifactId> <artifactId>commons-configuration</artifactId>