mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
2269 lines
91 KiB
Java
2269 lines
91 KiB
Java
/**
|
|
* The contents of this file are subject to the license and copyright
|
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
|
* tree and available online at
|
|
*
|
|
* http://www.dspace.org/license/
|
|
*/
|
|
package org.dspace.discovery;
|
|
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.InputStreamReader;
|
|
import java.io.PrintWriter;
|
|
import java.io.StringWriter;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URI;
|
|
import java.net.URISyntaxException;
|
|
import java.sql.SQLException;
|
|
import java.text.ParseException;
|
|
import java.text.SimpleDateFormat;
|
|
import java.util.ArrayList;
|
|
import java.util.Calendar;
|
|
import java.util.Collections;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
import java.util.TimeZone;
|
|
import java.util.TreeMap;
|
|
import java.util.Vector;
|
|
|
|
import org.apache.commons.collections.CollectionUtils;
|
|
import org.apache.commons.collections.MapUtils;
|
|
import org.apache.commons.collections.Transformer;
|
|
import org.apache.commons.httpclient.HttpClient;
|
|
import org.apache.commons.httpclient.methods.GetMethod;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.lang.ArrayUtils;
|
|
import org.apache.commons.lang.StringUtils;
|
|
import org.apache.commons.lang.time.DateFormatUtils;
|
|
import org.apache.commons.validator.routines.UrlValidator;
|
|
import org.apache.http.HttpHost;
|
|
import org.apache.http.HttpResponse;
|
|
import org.apache.http.client.methods.HttpGet;
|
|
import org.apache.http.client.params.ClientPNames;
|
|
import org.apache.http.client.utils.URIBuilder;
|
|
import org.apache.log4j.Logger;
|
|
import org.apache.solr.client.solrj.SolrQuery;
|
|
import org.apache.solr.client.solrj.SolrServerException;
|
|
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
|
import org.apache.solr.client.solrj.response.FacetField;
|
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
|
import org.apache.solr.common.SolrDocument;
|
|
import org.apache.solr.common.SolrDocumentList;
|
|
import org.apache.solr.common.SolrInputDocument;
|
|
import org.apache.solr.common.params.*;
|
|
import org.apache.solr.common.util.NamedList;
|
|
import org.dspace.content.Bitstream;
|
|
import org.dspace.content.Bundle;
|
|
import org.dspace.content.Collection;
|
|
import org.dspace.content.Community;
|
|
import org.dspace.content.DCValue;
|
|
import org.dspace.content.DSpaceObject;
|
|
import org.dspace.content.Item;
|
|
import org.dspace.content.ItemIterator;
|
|
import org.dspace.content.authority.ChoiceAuthorityManager;
|
|
import org.dspace.content.authority.Choices;
|
|
import org.dspace.content.authority.MetadataAuthorityManager;
|
|
import org.dspace.core.ConfigurationManager;
|
|
import org.dspace.core.Constants;
|
|
import org.dspace.core.Context;
|
|
import org.dspace.core.Email;
|
|
import org.dspace.core.I18nUtil;
|
|
import org.dspace.core.LogManager;
|
|
import org.dspace.discovery.configuration.DiscoveryConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoveryConfigurationParameters;
|
|
import org.dspace.discovery.configuration.DiscoveryHitHighlightFieldConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoveryHitHighlightingConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoveryMoreLikeThisConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoveryRecentSubmissionsConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoverySearchFilter;
|
|
import org.dspace.discovery.configuration.DiscoverySearchFilterFacet;
|
|
import org.dspace.discovery.configuration.DiscoverySortConfiguration;
|
|
import org.dspace.discovery.configuration.DiscoverySortFieldConfiguration;
|
|
import org.dspace.discovery.configuration.HierarchicalSidebarFacetConfiguration;
|
|
import org.dspace.handle.HandleManager;
|
|
import org.dspace.utils.DSpace;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
/**
|
|
* SolrIndexer contains the methods that index Items and their metadata,
|
|
* collections, communities, etc. It is meant to either be invoked from the
|
|
* command line (see dspace/bin/index-all) or via the indexContent() methods
|
|
* within DSpace.
|
|
* <p/>
|
|
* The Administrator can choose to run SolrIndexer in a cron that repeats
|
|
* regularly, a failed attempt to index from the UI will be "caught" up on in
|
|
* that cron.
|
|
*
|
|
* The SolrServiceImple is registered as a Service in the ServiceManager via
|
|
* A spring configuration file located under
|
|
* classpath://spring/spring-dspace-applicationContext.xml
|
|
*
|
|
* Its configuration is Autowired by the ApplicationContext
|
|
*
|
|
* @author Kevin Van de Velde (kevin at atmire dot com)
|
|
* @author Mark Diggory (markd at atmire dot com)
|
|
* @author Ben Bosman (ben at atmire dot com)
|
|
*/
|
|
@Service
|
|
public class SolrServiceImpl implements SearchService, IndexingService {
|
|
|
|
private static final Logger log = Logger.getLogger(SolrServiceImpl.class);
|
|
|
|
protected static final String LAST_INDEXED_FIELD = "SolrIndexer.lastIndexed";
|
|
|
|
public static final String FILTER_SEPARATOR = "\n|||\n";
|
|
|
|
public static final String AUTHORITY_SEPARATOR = "###";
|
|
|
|
public static final String STORE_SEPARATOR = "\n|||\n";
|
|
|
|
public static final String VARIANTS_STORE_SEPARATOR = "###";
|
|
|
|
/**
|
|
* Non-Static CommonsHttpSolrServer for processing indexing events.
|
|
*/
|
|
private HttpSolrServer solr = null;
|
|
|
|
|
|
protected HttpSolrServer getSolr()
|
|
{
|
|
if ( solr == null)
|
|
{
|
|
String solrService = new DSpace().getConfigurationService()
|
|
.getProperty("discovery.search.server");
|
|
UrlValidator urlValidator = new UrlValidator(UrlValidator.ALLOW_LOCAL_URLS);
|
|
if(urlValidator.isValid(solrService))
|
|
{
|
|
try {
|
|
log.debug("Solr URL: " + solrService);
|
|
solr = new HttpSolrServer(solrService);
|
|
|
|
solr.setBaseURL(solrService);
|
|
|
|
SolrQuery solrQuery = new SolrQuery()
|
|
.setQuery("search.resourcetype:2 AND search.resourceid:1");
|
|
|
|
solr.query(solrQuery);
|
|
} catch (SolrServerException e) {
|
|
log.error("Error while initialinging solr server", e);
|
|
}
|
|
}else{
|
|
log.error("Error while initializing solr, invalid url: " + solrService);
|
|
}
|
|
}
|
|
|
|
return solr;
|
|
}
|
|
|
|
/**
|
|
* If the handle for the "dso" already exists in the index, and the "dso"
|
|
* has a lastModified timestamp that is newer than the document in the index
|
|
* then it is updated, otherwise a new document is added.
|
|
*
|
|
* @param context Users Context
|
|
* @param dso DSpace Object (Item, Collection or Community
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
public void indexContent(Context context, DSpaceObject dso)
|
|
throws SQLException {
|
|
indexContent(context, dso, false);
|
|
}
|
|
|
|
/**
|
|
* If the handle for the "dso" already exists in the index, and the "dso"
|
|
* has a lastModified timestamp that is newer than the document in the index
|
|
* then it is updated, otherwise a new document is added.
|
|
*
|
|
* @param context Users Context
|
|
* @param dso DSpace Object (Item, Collection or Community
|
|
* @param force Force update even if not stale.
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
public void indexContent(Context context, DSpaceObject dso,
|
|
boolean force) throws SQLException {
|
|
|
|
String handle = dso.getHandle();
|
|
|
|
if (handle == null)
|
|
{
|
|
handle = HandleManager.findHandle(context, dso);
|
|
}
|
|
|
|
try {
|
|
switch (dso.getType())
|
|
{
|
|
case Constants.ITEM:
|
|
Item item = (Item) dso;
|
|
if (item.isArchived() || item.isWithdrawn())
|
|
{
|
|
/**
|
|
* If the item is in the repository now, add it to the index
|
|
*/
|
|
if (requiresIndexing(handle, ((Item) dso).getLastModified())
|
|
|| force)
|
|
{
|
|
unIndexContent(context, handle);
|
|
buildDocument(context, (Item) dso);
|
|
}
|
|
} else {
|
|
/**
|
|
* Make sure the item is not in the index if it is not in
|
|
* archive or withwrawn.
|
|
*/
|
|
unIndexContent(context, item);
|
|
log.info("Removed Item: " + handle + " from Index");
|
|
}
|
|
break;
|
|
|
|
case Constants.COLLECTION:
|
|
buildDocument(context, (Collection) dso);
|
|
log.info("Wrote Collection: " + handle + " to Index");
|
|
break;
|
|
|
|
case Constants.COMMUNITY:
|
|
buildDocument(context, (Community) dso);
|
|
log.info("Wrote Community: " + handle + " to Index");
|
|
break;
|
|
|
|
default:
|
|
log
|
|
.error("Only Items, Collections and Communities can be Indexed");
|
|
}
|
|
|
|
} catch (Exception e)
|
|
{
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* unIndex removes an Item, Collection, or Community
|
|
*
|
|
* @param context
|
|
* @param dso DSpace Object, can be Community, Item, or Collection
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
public void unIndexContent(Context context, DSpaceObject dso)
|
|
throws SQLException, IOException {
|
|
unIndexContent(context, dso, false);
|
|
}
|
|
|
|
/**
|
|
* unIndex removes an Item, Collection, or Community
|
|
*
|
|
* @param context
|
|
* @param dso DSpace Object, can be Community, Item, or Collection
|
|
* @param commit if <code>true</code> force an immediate commit on SOLR
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
public void unIndexContent(Context context, DSpaceObject dso, boolean commit)
|
|
throws SQLException, IOException {
|
|
try {
|
|
if (dso == null)
|
|
{
|
|
return;
|
|
}
|
|
String uniqueID = dso.getType()+"-"+dso.getID();
|
|
getSolr().deleteById(uniqueID);
|
|
if(commit)
|
|
{
|
|
getSolr().commit();
|
|
}
|
|
} catch (Exception exception) {
|
|
log.error(exception.getMessage(), exception);
|
|
emailException(exception);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Unindex a Document in the Lucene index.
|
|
* @param context the dspace context
|
|
* @param handle the handle of the object to be deleted
|
|
* @throws IOException
|
|
* @throws SQLException
|
|
*/
|
|
public void unIndexContent(Context context, String handle) throws IOException, SQLException {
|
|
unIndexContent(context, handle, false);
|
|
}
|
|
|
|
/**
|
|
* Unindex a Document in the Lucene Index.
|
|
* @param context the dspace context
|
|
* @param handle the handle of the object to be deleted
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
public void unIndexContent(Context context, String handle, boolean commit)
|
|
throws SQLException, IOException {
|
|
|
|
try {
|
|
if(getSolr() != null){
|
|
getSolr().deleteByQuery("handle:\"" + handle + "\"");
|
|
if(commit)
|
|
{
|
|
getSolr().commit();
|
|
}
|
|
}
|
|
} catch (SolrServerException e)
|
|
{
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* reIndexContent removes something from the index, then re-indexes it
|
|
*
|
|
* @param context context object
|
|
* @param dso object to re-index
|
|
*/
|
|
public void reIndexContent(Context context, DSpaceObject dso)
|
|
throws SQLException, IOException {
|
|
try {
|
|
indexContent(context, dso);
|
|
} catch (Exception exception)
|
|
{
|
|
log.error(exception.getMessage(), exception);
|
|
emailException(exception);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* create full index - wiping old index
|
|
*
|
|
* @param c context to use
|
|
*/
|
|
public void createIndex(Context c) throws SQLException, IOException {
|
|
|
|
/* Reindex all content preemptively. */
|
|
updateIndex(c, true);
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Iterates over all Items, Collections and Communities. And updates them in
|
|
* the index. Uses decaching to control memory footprint. Uses indexContent
|
|
* and isStale to check state of item in index.
|
|
*
|
|
* @param context the dspace context
|
|
*/
|
|
public void updateIndex(Context context)
|
|
{
|
|
updateIndex(context, false);
|
|
}
|
|
|
|
/**
|
|
* Iterates over all Items, Collections and Communities. And updates them in
|
|
* the index. Uses decaching to control memory footprint. Uses indexContent
|
|
* and isStale to check state of item in index.
|
|
* <p/>
|
|
* At first it may appear counterintuitive to have an IndexWriter/Reader
|
|
* opened and closed on each DSO. But this allows the UI processes to step
|
|
* in and attain a lock and write to the index even if other processes/jvms
|
|
* are running a reindex.
|
|
*
|
|
* @param context the dspace context
|
|
* @param force whether or not to force the reindexing
|
|
*/
|
|
public void updateIndex(Context context, boolean force)
|
|
{
|
|
try {
|
|
ItemIterator items = null;
|
|
try {
|
|
for (items = Item.findAllUnfiltered(context); items.hasNext();)
|
|
{
|
|
Item item = items.next();
|
|
indexContent(context, item, force);
|
|
item.decache();
|
|
}
|
|
} finally {
|
|
if (items != null)
|
|
{
|
|
items.close();
|
|
}
|
|
}
|
|
|
|
Collection[] collections = Collection.findAll(context);
|
|
for (Collection collection : collections)
|
|
{
|
|
indexContent(context, collection, force);
|
|
context.removeCached(collection, collection.getID());
|
|
|
|
}
|
|
|
|
Community[] communities = Community.findAll(context);
|
|
for (Community community : communities)
|
|
{
|
|
indexContent(context, community, force);
|
|
context.removeCached(community, community.getID());
|
|
}
|
|
|
|
if(getSolr() != null)
|
|
{
|
|
getSolr().commit();
|
|
}
|
|
|
|
} catch (Exception e)
|
|
{
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Iterates over all documents in the Lucene index and verifies they are in
|
|
* database, if not, they are removed.
|
|
*
|
|
* @param force whether or not to force a clean index
|
|
* @throws IOException IO exception
|
|
* @throws SQLException sql exception
|
|
* @throws SearchServiceException occurs when something went wrong with querying the solr server
|
|
*/
|
|
public void cleanIndex(boolean force) throws IOException,
|
|
SQLException, SearchServiceException {
|
|
|
|
Context context = new Context();
|
|
context.turnOffAuthorisationSystem();
|
|
|
|
try
|
|
{
|
|
if(getSolr() == null)
|
|
{
|
|
return;
|
|
}
|
|
if (force)
|
|
{
|
|
getSolr().deleteByQuery("search.resourcetype:[2 TO 4]");
|
|
} else {
|
|
SolrQuery query = new SolrQuery();
|
|
query.setQuery("search.resourcetype:[2 TO 4]");
|
|
QueryResponse rsp = getSolr().query(query);
|
|
SolrDocumentList docs = rsp.getResults();
|
|
|
|
Iterator iter = docs.iterator();
|
|
while (iter.hasNext())
|
|
{
|
|
|
|
SolrDocument doc = (SolrDocument) iter.next();
|
|
|
|
String handle = (String) doc.getFieldValue("handle");
|
|
|
|
DSpaceObject o = HandleManager.resolveToObject(context, handle);
|
|
|
|
if (o == null)
|
|
{
|
|
log.info("Deleting: " + handle);
|
|
/*
|
|
* Use IndexWriter to delete, its easier to manage
|
|
* write.lock
|
|
*/
|
|
unIndexContent(context, handle);
|
|
} else {
|
|
context.removeCached(o, o.getID());
|
|
log.debug("Keeping: " + handle);
|
|
}
|
|
}
|
|
}
|
|
} catch(Exception e)
|
|
{
|
|
|
|
throw new SearchServiceException(e.getMessage(), e);
|
|
} finally
|
|
{
|
|
context.abort();
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
/**
|
|
* Maintenance to keep a SOLR index efficient.
|
|
* Note: This might take a long time.
|
|
*/
|
|
public void optimize()
|
|
{
|
|
try {
|
|
if(getSolr() == null)
|
|
{
|
|
return;
|
|
}
|
|
long start = System.currentTimeMillis();
|
|
System.out.println("SOLR Search Optimize -- Process Started:"+start);
|
|
getSolr().optimize();
|
|
long finish = System.currentTimeMillis();
|
|
System.out.println("SOLR Search Optimize -- Process Finished:"+finish);
|
|
System.out.println("SOLR Search Optimize -- Total time taken:"+(finish-start) + " (ms).");
|
|
} catch (SolrServerException sse)
|
|
{
|
|
System.err.println(sse.getMessage());
|
|
} catch (IOException ioe)
|
|
{
|
|
System.err.println(ioe.getMessage());
|
|
}
|
|
}
|
|
|
|
// //////////////////////////////////
|
|
// Private
|
|
// //////////////////////////////////
|
|
|
|
protected void emailException(Exception exception)
|
|
{
|
|
// Also email an alert, system admin may need to check for stale lock
|
|
try {
|
|
String recipient = ConfigurationManager
|
|
.getProperty("alert.recipient");
|
|
|
|
if (StringUtils.isNotBlank(recipient))
|
|
{
|
|
Email email = Email
|
|
.getEmail(I18nUtil.getEmailFilename(
|
|
Locale.getDefault(), "internal_error"));
|
|
email.addRecipient(recipient);
|
|
email.addArgument(ConfigurationManager
|
|
.getProperty("dspace.url"));
|
|
email.addArgument(new Date());
|
|
|
|
String stackTrace;
|
|
|
|
if (exception != null)
|
|
{
|
|
StringWriter sw = new StringWriter();
|
|
PrintWriter pw = new PrintWriter(sw);
|
|
exception.printStackTrace(pw);
|
|
pw.flush();
|
|
stackTrace = sw.toString();
|
|
} else {
|
|
stackTrace = "No exception";
|
|
}
|
|
|
|
email.addArgument(stackTrace);
|
|
email.send();
|
|
}
|
|
} catch (Exception e)
|
|
{
|
|
// Not much we can do here!
|
|
log.warn("Unable to send email alert", e);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Is stale checks the lastModified time stamp in the database and the index
|
|
* to determine if the index is stale.
|
|
*
|
|
* @param handle the handle of the dso
|
|
* @param lastModified the last modified date of the DSpace object
|
|
* @return a boolean indicating if the dso should be re indexed again
|
|
* @throws SQLException sql exception
|
|
* @throws IOException io exception
|
|
* @throws SearchServiceException if something went wrong with querying the solr server
|
|
*/
|
|
protected boolean requiresIndexing(String handle, Date lastModified)
|
|
throws SQLException, IOException, SearchServiceException {
|
|
|
|
boolean reindexItem = false;
|
|
boolean inIndex = false;
|
|
|
|
SolrQuery query = new SolrQuery();
|
|
query.setQuery("handle:" + handle);
|
|
QueryResponse rsp;
|
|
|
|
try {
|
|
if(getSolr() == null)
|
|
{
|
|
return false;
|
|
}
|
|
rsp = getSolr().query(query);
|
|
} catch (SolrServerException e)
|
|
{
|
|
throw new SearchServiceException(e.getMessage(),e);
|
|
}
|
|
|
|
for (SolrDocument doc : rsp.getResults())
|
|
{
|
|
|
|
inIndex = true;
|
|
|
|
Object value = doc.getFieldValue(LAST_INDEXED_FIELD);
|
|
|
|
if(value instanceof Date)
|
|
{
|
|
Date lastIndexed = (Date) value;
|
|
|
|
if (lastIndexed.before(lastModified))
|
|
{
|
|
|
|
reindexItem = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return reindexItem || !inIndex;
|
|
}
|
|
|
|
|
|
/**
|
|
* @param myitem the item for which our locations are to be retrieved
|
|
* @return a list containing the identifiers of the communities & collections
|
|
* @throws SQLException sql exception
|
|
*/
|
|
protected List<String> getItemLocations(Item myitem)
|
|
throws SQLException {
|
|
List<String> locations = new Vector<String>();
|
|
|
|
// build list of community ids
|
|
Community[] communities = myitem.getCommunities();
|
|
|
|
// build list of collection ids
|
|
Collection[] collections = myitem.getCollections();
|
|
|
|
// now put those into strings
|
|
int i = 0;
|
|
|
|
for (i = 0; i < communities.length; i++)
|
|
{
|
|
locations.add("m" + communities[i].getID());
|
|
}
|
|
|
|
for (i = 0; i < collections.length; i++)
|
|
{
|
|
locations.add("l" + collections[i].getID());
|
|
}
|
|
|
|
return locations;
|
|
}
|
|
|
|
protected List<String> getCollectionLocations(Collection target) throws SQLException {
|
|
List<String> locations = new Vector<String>();
|
|
// build list of community ids
|
|
Community[] communities = target.getCommunities();
|
|
|
|
// now put those into strings
|
|
for (Community community : communities)
|
|
{
|
|
locations.add("m" + community.getID());
|
|
}
|
|
|
|
return locations;
|
|
}
|
|
|
|
/**
|
|
* Write the document to the index under the appropriate handle.
|
|
* @param doc the solr document to be written to the server
|
|
* @throws IOException IO exception
|
|
*/
|
|
protected void writeDocument(SolrInputDocument doc) throws IOException {
|
|
|
|
try {
|
|
if(getSolr() != null)
|
|
{
|
|
getSolr().add(doc);
|
|
}
|
|
} catch (SolrServerException e)
|
|
{
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build a solr document for a DSpace Community.
|
|
*
|
|
* @param community Community to be indexed
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
protected void buildDocument(Context context, Community community)
|
|
throws SQLException, IOException {
|
|
// Create Document
|
|
SolrInputDocument doc = buildDocument(Constants.COMMUNITY, community.getID(),
|
|
community.getHandle(), null);
|
|
|
|
DiscoveryConfiguration discoveryConfiguration = SearchUtils.getDiscoveryConfiguration(community);
|
|
DiscoveryHitHighlightingConfiguration highlightingConfiguration = discoveryConfiguration.getHitHighlightingConfiguration();
|
|
List<String> highlightedMetadataFields = new ArrayList<String>();
|
|
if(highlightingConfiguration != null)
|
|
{
|
|
for (DiscoveryHitHighlightFieldConfiguration configuration : highlightingConfiguration.getMetadataFields())
|
|
{
|
|
highlightedMetadataFields.add(configuration.getField());
|
|
}
|
|
}
|
|
|
|
// and populate it
|
|
String description = community.getMetadata("introductory_text");
|
|
String description_abstract = community.getMetadata("short_description");
|
|
String description_table = community.getMetadata("side_bar_text");
|
|
String rights = community.getMetadata("copyright_text");
|
|
String title = community.getMetadata("name");
|
|
|
|
List<String> toIgnoreMetadataFields = SearchUtils.getIgnoredMetadataFields(community.getType());
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description", description);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description.abstract", description_abstract);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description.tableofcontents", description_table);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.rights", rights);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.title", title);
|
|
|
|
//Do any additional indexing, depends on the plugins
|
|
List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class);
|
|
for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins)
|
|
{
|
|
solrServiceIndexPlugin.additionalIndex(context, community, doc);
|
|
}
|
|
|
|
writeDocument(doc);
|
|
}
|
|
|
|
/**
|
|
* Build a solr document for a DSpace Collection.
|
|
*
|
|
* @param collection Collection to be indexed
|
|
* @throws SQLException sql exception
|
|
* @throws IOException IO exception
|
|
*/
|
|
protected void buildDocument(Context context, Collection collection)
|
|
throws SQLException, IOException {
|
|
List<String> locations = getCollectionLocations(collection);
|
|
|
|
// Create Lucene Document
|
|
SolrInputDocument doc = buildDocument(Constants.COLLECTION, collection.getID(),
|
|
collection.getHandle(), locations);
|
|
|
|
DiscoveryConfiguration discoveryConfiguration = SearchUtils.getDiscoveryConfiguration(collection);
|
|
DiscoveryHitHighlightingConfiguration highlightingConfiguration = discoveryConfiguration.getHitHighlightingConfiguration();
|
|
List<String> highlightedMetadataFields = new ArrayList<String>();
|
|
if(highlightingConfiguration != null)
|
|
{
|
|
for (DiscoveryHitHighlightFieldConfiguration configuration : highlightingConfiguration.getMetadataFields())
|
|
{
|
|
highlightedMetadataFields.add(configuration.getField());
|
|
}
|
|
}
|
|
|
|
|
|
// and populate it
|
|
String description = collection.getMetadata("introductory_text");
|
|
String description_abstract = collection.getMetadata("short_description");
|
|
String description_table = collection.getMetadata("side_bar_text");
|
|
String provenance = collection.getMetadata("provenance_description");
|
|
String rights = collection.getMetadata("copyright_text");
|
|
String rights_license = collection.getMetadata("license");
|
|
String title = collection.getMetadata("name");
|
|
|
|
List<String> toIgnoreMetadataFields = SearchUtils.getIgnoredMetadataFields(collection.getType());
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description", description);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description.abstract", description_abstract);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.description.tableofcontents", description_table);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.provenance", provenance);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.rights", rights);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.rights.license", rights_license);
|
|
addContainerMetadataField(doc, highlightedMetadataFields, toIgnoreMetadataFields, "dc.title", title);
|
|
|
|
|
|
//Do any additional indexing, depends on the plugins
|
|
List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class);
|
|
for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins)
|
|
{
|
|
solrServiceIndexPlugin.additionalIndex(context, collection, doc);
|
|
}
|
|
|
|
writeDocument(doc);
|
|
}
|
|
|
|
/**
|
|
* Add the metadata value of the community/collection to the solr document
|
|
* IF needed highlighting is added !
|
|
* @param doc the solr document
|
|
* @param highlightedMetadataFields the list of metadata fields that CAN be highlighted
|
|
* @param metadataField the metadata field added
|
|
* @param value the value (can be NULL !)
|
|
*/
|
|
protected void addContainerMetadataField(SolrInputDocument doc, List<String> highlightedMetadataFields, List<String> toIgnoreMetadataFields, String metadataField, String value)
|
|
{
|
|
if(toIgnoreMetadataFields == null || !toIgnoreMetadataFields.contains(metadataField))
|
|
{
|
|
if(StringUtils.isNotBlank(value))
|
|
{
|
|
doc.addField(metadataField, value);
|
|
if(highlightedMetadataFields.contains(metadataField))
|
|
{
|
|
doc.addField(metadataField + "_hl", value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build a Lucene document for a DSpace Item and write the index
|
|
*
|
|
* @param context Users Context
|
|
* @param item The DSpace Item to be indexed
|
|
* @throws SQLException
|
|
* @throws IOException
|
|
*/
|
|
protected void buildDocument(Context context, Item item)
|
|
throws SQLException, IOException {
|
|
String handle = item.getHandle();
|
|
|
|
if (handle == null)
|
|
{
|
|
handle = HandleManager.findHandle(context, item);
|
|
}
|
|
|
|
// get the location string (for searching by collection & community)
|
|
List<String> locations = getItemLocations(item);
|
|
|
|
SolrInputDocument doc = buildDocument(Constants.ITEM, item.getID(), handle,
|
|
locations);
|
|
|
|
log.debug("Building Item: " + handle);
|
|
|
|
doc.addField("withdrawn", item.isWithdrawn());
|
|
doc.addField("discoverable", item.isDiscoverable());
|
|
|
|
//Keep a list of our sort values which we added, sort values can only be added once
|
|
List<String> sortFieldsAdded = new ArrayList<String>();
|
|
Set<String> hitHighlightingFields = new HashSet<String>();
|
|
try {
|
|
List<DiscoveryConfiguration> discoveryConfigurations = SearchUtils.getAllDiscoveryConfigurations(item);
|
|
|
|
//A map used to save each sidebarFacet config by the metadata fields
|
|
Map<String, List<DiscoverySearchFilter>> searchFilters = new HashMap<String, List<DiscoverySearchFilter>>();
|
|
Map<String, DiscoverySortFieldConfiguration> sortFields = new HashMap<String, DiscoverySortFieldConfiguration>();
|
|
Map<String, DiscoveryRecentSubmissionsConfiguration> recentSubmissionsConfigurationMap = new HashMap<String, DiscoveryRecentSubmissionsConfiguration>();
|
|
Set<String> moreLikeThisFields = new HashSet<String>();
|
|
for (DiscoveryConfiguration discoveryConfiguration : discoveryConfigurations)
|
|
{
|
|
for (int i = 0; i < discoveryConfiguration.getSearchFilters().size(); i++)
|
|
{
|
|
DiscoverySearchFilter discoverySearchFilter = discoveryConfiguration.getSearchFilters().get(i);
|
|
for (int j = 0; j < discoverySearchFilter.getMetadataFields().size(); j++)
|
|
{
|
|
String metadataField = discoverySearchFilter.getMetadataFields().get(j);
|
|
List<DiscoverySearchFilter> resultingList;
|
|
if(searchFilters.get(metadataField) != null)
|
|
{
|
|
resultingList = searchFilters.get(metadataField);
|
|
}else{
|
|
//New metadata field, create a new list for it
|
|
resultingList = new ArrayList<DiscoverySearchFilter>();
|
|
}
|
|
resultingList.add(discoverySearchFilter);
|
|
|
|
searchFilters.put(metadataField, resultingList);
|
|
}
|
|
}
|
|
|
|
DiscoverySortConfiguration sortConfiguration = discoveryConfiguration.getSearchSortConfiguration();
|
|
if(sortConfiguration != null)
|
|
{
|
|
for (DiscoverySortFieldConfiguration discoverySortConfiguration : sortConfiguration.getSortFields())
|
|
{
|
|
sortFields.put(discoverySortConfiguration.getMetadataField(), discoverySortConfiguration);
|
|
}
|
|
}
|
|
|
|
DiscoveryRecentSubmissionsConfiguration recentSubmissionConfiguration = discoveryConfiguration.getRecentSubmissionConfiguration();
|
|
if(recentSubmissionConfiguration != null)
|
|
{
|
|
recentSubmissionsConfigurationMap.put(recentSubmissionConfiguration.getMetadataSortField(), recentSubmissionConfiguration);
|
|
}
|
|
|
|
DiscoveryHitHighlightingConfiguration hitHighlightingConfiguration = discoveryConfiguration.getHitHighlightingConfiguration();
|
|
if(hitHighlightingConfiguration != null)
|
|
{
|
|
List<DiscoveryHitHighlightFieldConfiguration> fieldConfigurations = hitHighlightingConfiguration.getMetadataFields();
|
|
for (DiscoveryHitHighlightFieldConfiguration fieldConfiguration : fieldConfigurations)
|
|
{
|
|
hitHighlightingFields.add(fieldConfiguration.getField());
|
|
}
|
|
}
|
|
DiscoveryMoreLikeThisConfiguration moreLikeThisConfiguration = discoveryConfiguration.getMoreLikeThisConfiguration();
|
|
if(moreLikeThisConfiguration != null)
|
|
{
|
|
for(String metadataField : moreLikeThisConfiguration.getSimilarityMetadataFields())
|
|
{
|
|
moreLikeThisFields.add(metadataField);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
List<String> toProjectionFields = new ArrayList<String>();
|
|
String projectionFieldsString = new DSpace().getConfigurationService().getProperty("discovery.index.projection");
|
|
if(projectionFieldsString != null){
|
|
if(projectionFieldsString.indexOf(",") != -1){
|
|
for (int i = 0; i < projectionFieldsString.split(",").length; i++) {
|
|
toProjectionFields.add(projectionFieldsString.split(",")[i].trim());
|
|
}
|
|
} else {
|
|
toProjectionFields.add(projectionFieldsString);
|
|
}
|
|
}
|
|
|
|
DCValue[] mydc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
|
|
for (DCValue meta : mydc)
|
|
{
|
|
String field = meta.schema + "." + meta.element;
|
|
String unqualifiedField = field;
|
|
|
|
String value = meta.value;
|
|
|
|
if (value == null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (meta.qualifier != null && !meta.qualifier.trim().equals(""))
|
|
{
|
|
field += "." + meta.qualifier;
|
|
}
|
|
|
|
List<String> toIgnoreMetadataFields = SearchUtils.getIgnoredMetadataFields(item.getType());
|
|
//We are not indexing provenance, this is useless
|
|
if (toIgnoreMetadataFields != null && (toIgnoreMetadataFields.contains(field) || toIgnoreMetadataFields.contains(unqualifiedField + "." + Item.ANY)))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
String authority = null;
|
|
String preferedLabel = null;
|
|
List<String> variants = null;
|
|
boolean isAuthorityControlled = MetadataAuthorityManager
|
|
.getManager().isAuthorityControlled(meta.schema,
|
|
meta.element,
|
|
meta.qualifier);
|
|
|
|
int minConfidence = isAuthorityControlled?MetadataAuthorityManager
|
|
.getManager().getMinConfidence(
|
|
meta.schema,
|
|
meta.element,
|
|
meta.qualifier):Choices.CF_ACCEPTED;
|
|
|
|
if (isAuthorityControlled && meta.authority != null
|
|
&& meta.confidence >= minConfidence)
|
|
{
|
|
boolean ignoreAuthority = new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore." + field,
|
|
new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore",
|
|
new Boolean(false)), true);
|
|
if (!ignoreAuthority)
|
|
{
|
|
authority = meta.authority;
|
|
|
|
boolean ignorePrefered = new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore-prefered."
|
|
+ field,
|
|
new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore-prefered",
|
|
new Boolean(false)),
|
|
true);
|
|
if (!ignorePrefered)
|
|
{
|
|
|
|
preferedLabel = ChoiceAuthorityManager.getManager()
|
|
.getLabel(meta.schema, meta.element,
|
|
meta.qualifier, meta.authority,
|
|
meta.language);
|
|
}
|
|
|
|
boolean ignoreVariants = new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore-variants."
|
|
+ field,
|
|
new DSpace()
|
|
.getConfigurationService()
|
|
.getPropertyAsType(
|
|
"discovery.index.authority.ignore-variants",
|
|
new Boolean(false)),
|
|
true);
|
|
if (!ignoreVariants)
|
|
{
|
|
variants = ChoiceAuthorityManager.getManager()
|
|
.getVariants(meta.schema, meta.element,
|
|
meta.qualifier, meta.authority,
|
|
meta.language);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if ((searchFilters.get(field) != null || searchFilters.get(unqualifiedField + "." + Item.ANY) != null))
|
|
{
|
|
List<DiscoverySearchFilter> searchFilterConfigs = searchFilters.get(field);
|
|
if(searchFilterConfigs == null)
|
|
{
|
|
searchFilterConfigs = searchFilters.get(unqualifiedField + "." + Item.ANY);
|
|
}
|
|
|
|
for (DiscoverySearchFilter searchFilter : searchFilterConfigs)
|
|
{
|
|
Date date = null;
|
|
String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char");
|
|
if(separator == null)
|
|
{
|
|
separator = FILTER_SEPARATOR;
|
|
}
|
|
if(searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE))
|
|
{
|
|
//For our search filters that are dates we format them properly
|
|
date = toDate(value);
|
|
if(date != null)
|
|
{
|
|
//TODO: make this date format configurable !
|
|
value = DateFormatUtils.formatUTC(date, "yyyy-MM-dd");
|
|
}
|
|
}
|
|
doc.addField(searchFilter.getIndexFieldName(), value);
|
|
doc.addField(searchFilter.getIndexFieldName() + "_keyword", value);
|
|
|
|
if (authority != null && preferedLabel == null)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_keyword", value + AUTHORITY_SEPARATOR
|
|
+ authority);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_authority", authority);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_acid", value.toLowerCase()
|
|
+ separator + value
|
|
+ AUTHORITY_SEPARATOR + authority);
|
|
}
|
|
|
|
if (preferedLabel != null)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName(),
|
|
preferedLabel);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_keyword", preferedLabel);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_keyword", preferedLabel
|
|
+ AUTHORITY_SEPARATOR + authority);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_authority", authority);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_acid", preferedLabel.toLowerCase()
|
|
+ separator + preferedLabel
|
|
+ AUTHORITY_SEPARATOR + authority);
|
|
}
|
|
if (variants != null)
|
|
{
|
|
for (String var : variants)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName() + "_keyword", var);
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_acid", var.toLowerCase()
|
|
+ separator + var
|
|
+ AUTHORITY_SEPARATOR + authority);
|
|
}
|
|
}
|
|
|
|
//Add a dynamic fields for auto complete in search
|
|
doc.addField(searchFilter.getIndexFieldName() + "_ac",
|
|
value.toLowerCase() + separator + value);
|
|
if (preferedLabel != null)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_ac", preferedLabel.toLowerCase()
|
|
+ separator + preferedLabel);
|
|
}
|
|
if (variants != null)
|
|
{
|
|
for (String var : variants)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName()
|
|
+ "_ac", var.toLowerCase() + separator
|
|
+ var);
|
|
}
|
|
}
|
|
|
|
if(searchFilter.getFilterType().equals(DiscoverySearchFilterFacet.FILTER_TYPE_FACET))
|
|
{
|
|
if(searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_TEXT))
|
|
{
|
|
//Add a special filter
|
|
//We use a separator to split up the lowercase and regular case, this is needed to get our filters in regular case
|
|
//Solr has issues with facet prefix and cases
|
|
if (authority != null)
|
|
{
|
|
String facetValue = preferedLabel != null?preferedLabel:value;
|
|
doc.addField(searchFilter.getIndexFieldName() + "_filter", facetValue.toLowerCase() + separator + facetValue + AUTHORITY_SEPARATOR + authority);
|
|
}
|
|
else
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName() + "_filter", value.toLowerCase() + separator + value);
|
|
}
|
|
}else
|
|
if(searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE))
|
|
{
|
|
if(date != null)
|
|
{
|
|
String indexField = searchFilter.getIndexFieldName() + ".year";
|
|
String yearUTC = DateFormatUtils.formatUTC(date, "yyyy");
|
|
doc.addField(searchFilter.getIndexFieldName() + "_keyword", yearUTC);
|
|
// add the year to the autocomplete index
|
|
doc.addField(searchFilter.getIndexFieldName() + "_ac", yearUTC);
|
|
doc.addField(indexField, yearUTC);
|
|
|
|
if (yearUTC.startsWith("0"))
|
|
{
|
|
doc.addField(
|
|
searchFilter.getIndexFieldName()
|
|
+ "_keyword",
|
|
yearUTC.replaceFirst("0*", ""));
|
|
// add date without starting zeros for autocomplete e filtering
|
|
doc.addField(
|
|
searchFilter.getIndexFieldName()
|
|
+ "_ac",
|
|
yearUTC.replaceFirst("0*", ""));
|
|
doc.addField(
|
|
searchFilter.getIndexFieldName()
|
|
+ "_ac",
|
|
value.replaceFirst("0*", ""));
|
|
doc.addField(
|
|
searchFilter.getIndexFieldName()
|
|
+ "_keyword",
|
|
value.replaceFirst("0*", ""));
|
|
}
|
|
|
|
//Also save a sort value of this year, this is required for determining the upper & lower bound year of our facet
|
|
if(doc.getField(indexField + "_sort") == null)
|
|
{
|
|
//We can only add one year so take the first one
|
|
doc.addField(indexField + "_sort", yearUTC);
|
|
}
|
|
}
|
|
}else
|
|
if(searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_HIERARCHICAL))
|
|
{
|
|
HierarchicalSidebarFacetConfiguration hierarchicalSidebarFacetConfiguration = (HierarchicalSidebarFacetConfiguration) searchFilter;
|
|
String[] subValues = value.split(hierarchicalSidebarFacetConfiguration.getSplitter());
|
|
if(hierarchicalSidebarFacetConfiguration.isSkipFirstNodeLevel() && 1 < subValues.length)
|
|
{
|
|
//Remove the first element of our array
|
|
subValues = (String[]) ArrayUtils.subarray(subValues, 1, subValues.length);
|
|
}
|
|
for (int i = 0; i < subValues.length; i++)
|
|
{
|
|
StringBuilder valueBuilder = new StringBuilder();
|
|
for(int j = 0; j <= i; j++)
|
|
{
|
|
valueBuilder.append(subValues[j]);
|
|
if(j < i)
|
|
{
|
|
valueBuilder.append(hierarchicalSidebarFacetConfiguration.getSplitter());
|
|
}
|
|
}
|
|
|
|
String indexValue = valueBuilder.toString().trim();
|
|
doc.addField(searchFilter.getIndexFieldName() + "_tax_" + i + "_filter", indexValue.toLowerCase() + separator + indexValue);
|
|
//We add the field x times that it has occurred
|
|
for(int j = i; j < subValues.length; j++)
|
|
{
|
|
doc.addField(searchFilter.getIndexFieldName() + "_filter", indexValue.toLowerCase() + separator + indexValue);
|
|
doc.addField(searchFilter.getIndexFieldName() + "_keyword", indexValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((sortFields.get(field) != null || recentSubmissionsConfigurationMap.get(field) != null) && !sortFieldsAdded.contains(field))
|
|
{
|
|
//Only add sort value once
|
|
String type;
|
|
if(sortFields.get(field) != null)
|
|
{
|
|
type = sortFields.get(field).getType();
|
|
}else{
|
|
type = recentSubmissionsConfigurationMap.get(field).getType();
|
|
}
|
|
|
|
if(type.equals(DiscoveryConfigurationParameters.TYPE_DATE))
|
|
{
|
|
Date date = toDate(value);
|
|
if(date != null)
|
|
{
|
|
doc.addField(field + "_dt", date);
|
|
}else{
|
|
log.warn("Error while indexing sort date field, item: " + item.getHandle() + " metadata field: " + field + " date value: " + date);
|
|
}
|
|
}else{
|
|
doc.addField(field + "_sort", value);
|
|
}
|
|
sortFieldsAdded.add(field);
|
|
}
|
|
|
|
if(hitHighlightingFields.contains(field) || hitHighlightingFields.contains("*") || hitHighlightingFields.contains(unqualifiedField + "." + Item.ANY))
|
|
{
|
|
doc.addField(field + "_hl", value);
|
|
}
|
|
|
|
if(moreLikeThisFields.contains(field) || moreLikeThisFields.contains(unqualifiedField + "." + Item.ANY))
|
|
{
|
|
doc.addField(field + "_mlt", value);
|
|
}
|
|
|
|
doc.addField(field, value);
|
|
if (toProjectionFields.contains(field) || toProjectionFields.contains(unqualifiedField + "." + Item.ANY))
|
|
{
|
|
StringBuffer variantsToStore = new StringBuffer();
|
|
if (variants != null)
|
|
{
|
|
for (String var : variants)
|
|
{
|
|
variantsToStore.append(VARIANTS_STORE_SEPARATOR);
|
|
variantsToStore.append(var);
|
|
}
|
|
}
|
|
doc.addField(
|
|
field + "_stored",
|
|
value + STORE_SEPARATOR + preferedLabel
|
|
+ STORE_SEPARATOR
|
|
+ (variantsToStore.length() > VARIANTS_STORE_SEPARATOR
|
|
.length() ? variantsToStore
|
|
.substring(VARIANTS_STORE_SEPARATOR
|
|
.length()) : "null")
|
|
+ STORE_SEPARATOR + authority
|
|
+ STORE_SEPARATOR + meta.language);
|
|
}
|
|
|
|
if (meta.language != null && !meta.language.trim().equals(""))
|
|
{
|
|
String langField = field + "." + meta.language;
|
|
doc.addField(langField, value);
|
|
}
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
|
|
|
|
log.debug(" Added Metadata");
|
|
|
|
try {
|
|
|
|
DCValue[] values = item.getMetadata("dc.relation.ispartof");
|
|
|
|
if(values != null && values.length > 0 && values[0] != null && values[0].value != null)
|
|
{
|
|
// group on parent
|
|
String handlePrefix = ConfigurationManager.getProperty("handle.canonical.prefix");
|
|
if (handlePrefix == null || handlePrefix.length() == 0)
|
|
{
|
|
handlePrefix = "http://hdl.handle.net/";
|
|
}
|
|
|
|
doc.addField("publication_grp",values[0].value.replaceFirst(handlePrefix,"") );
|
|
|
|
}
|
|
else
|
|
{
|
|
// group on self
|
|
doc.addField("publication_grp", item.getHandle());
|
|
}
|
|
|
|
} catch (Exception e)
|
|
{
|
|
log.error(e.getMessage(),e);
|
|
}
|
|
|
|
|
|
log.debug(" Added Grouping");
|
|
|
|
|
|
Vector<InputStreamReader> readers = new Vector<InputStreamReader>();
|
|
|
|
try {
|
|
// now get full text of any bitstreams in the TEXT bundle
|
|
// trundle through the bundles
|
|
Bundle[] myBundles = item.getBundles();
|
|
|
|
for (Bundle myBundle : myBundles)
|
|
{
|
|
if ((myBundle.getName() != null)
|
|
&& myBundle.getName().equals("TEXT"))
|
|
{
|
|
// a-ha! grab the text out of the bitstreams
|
|
Bitstream[] myBitstreams = myBundle.getBitstreams();
|
|
|
|
for (Bitstream myBitstream : myBitstreams)
|
|
{
|
|
try {
|
|
InputStreamReader is = new InputStreamReader(
|
|
myBitstream.retrieve()); // get input
|
|
readers.add(is);
|
|
|
|
// Add each InputStream to the Indexed Document
|
|
String value = IOUtils.toString(is);
|
|
doc.addField("fulltext", value);
|
|
|
|
if(hitHighlightingFields.contains("*") || hitHighlightingFields.contains("fulltext"))
|
|
{
|
|
doc.addField("fulltext_hl", value);
|
|
}
|
|
|
|
log.debug(" Added BitStream: "
|
|
+ myBitstream.getStoreNumber() + " "
|
|
+ myBitstream.getSequenceID() + " "
|
|
+ myBitstream.getName());
|
|
|
|
} catch (Exception e)
|
|
{
|
|
// this will never happen, but compiler is now
|
|
// happy.
|
|
log.trace(e.getMessage(), e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (RuntimeException e)
|
|
{
|
|
log.error(e.getMessage(), e);
|
|
}
|
|
finally {
|
|
Iterator<InputStreamReader> itr = readers.iterator();
|
|
while (itr.hasNext()) {
|
|
InputStreamReader reader = itr.next();
|
|
if (reader != null) {
|
|
reader.close();
|
|
}
|
|
}
|
|
log.debug("closed " + readers.size() + " readers");
|
|
}
|
|
|
|
//Do any additional indexing, depends on the plugins
|
|
List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class);
|
|
for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins)
|
|
{
|
|
solrServiceIndexPlugin.additionalIndex(context, item, doc);
|
|
}
|
|
|
|
// write the index and close the inputstreamreaders
|
|
try {
|
|
writeDocument(doc);
|
|
log.info("Wrote Item: " + handle + " to Index");
|
|
} catch (RuntimeException e)
|
|
{
|
|
log.error("Error while writing item to discovery index: " + handle + " message:"+ e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create Lucene document with all the shared fields initialized.
|
|
*
|
|
* @param type Type of DSpace Object
|
|
* @param id
|
|
* @param handle
|
|
* @param locations @return
|
|
*/
|
|
protected SolrInputDocument buildDocument(int type, int id, String handle,
|
|
List<String> locations)
|
|
{
|
|
SolrInputDocument doc = new SolrInputDocument();
|
|
|
|
// want to be able to check when last updated
|
|
// (not tokenized, but it is indexed)
|
|
doc.addField(LAST_INDEXED_FIELD, new Date());
|
|
|
|
// New fields to weaken the dependence on handles, and allow for faster
|
|
// list display
|
|
doc.addField("search.uniqueid", type+"-"+id);
|
|
doc.addField("search.resourcetype", Integer.toString(type));
|
|
|
|
doc.addField("search.resourceid", Integer.toString(id));
|
|
|
|
// want to be able to search for handle, so use keyword
|
|
// (not tokenized, but it is indexed)
|
|
if (handle != null)
|
|
{
|
|
// want to be able to search for handle, so use keyword
|
|
// (not tokenized, but it is indexed)
|
|
doc.addField("handle", handle);
|
|
}
|
|
|
|
if (locations != null)
|
|
{
|
|
for (String location : locations)
|
|
{
|
|
doc.addField("location", location);
|
|
if (location.startsWith("m"))
|
|
{
|
|
doc.addField("location.comm", location.substring(1));
|
|
}
|
|
else
|
|
{
|
|
doc.addField("location.coll", location.substring(1));
|
|
}
|
|
}
|
|
}
|
|
|
|
return doc;
|
|
}
|
|
|
|
/**
|
|
* Helper function to retrieve a date using a best guess of the potential
|
|
* date encodings on a field
|
|
*
|
|
* @param t the string to be transformed to a date
|
|
* @return a date if the formatting was successful, null if not able to transform to a date
|
|
*/
|
|
public static Date toDate(String t)
|
|
{
|
|
SimpleDateFormat[] dfArr;
|
|
|
|
// Choose the likely date formats based on string length
|
|
switch (t.length())
|
|
{
|
|
// case from 1 to 3 go through adding anyone a single 0. Case 4 define
|
|
// for all the SimpleDateFormat
|
|
case 1:
|
|
t = "0" + t;
|
|
case 2:
|
|
t = "0" + t;
|
|
case 3:
|
|
t = "0" + t;
|
|
case 4:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy")};
|
|
break;
|
|
case 6:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyyMM")};
|
|
break;
|
|
case 7:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy-MM")};
|
|
break;
|
|
case 8:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyyMMdd"),
|
|
new SimpleDateFormat("yyyy MMM")};
|
|
break;
|
|
case 10:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy-MM-dd")};
|
|
break;
|
|
case 11:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy MMM dd")};
|
|
break;
|
|
case 20:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat(
|
|
"yyyy-MM-dd'T'HH:mm:ss'Z'")};
|
|
break;
|
|
default:
|
|
dfArr = new SimpleDateFormat[]{new SimpleDateFormat(
|
|
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")};
|
|
break;
|
|
}
|
|
|
|
for (SimpleDateFormat df : dfArr)
|
|
{
|
|
try {
|
|
// Parse the date
|
|
df.setCalendar(Calendar
|
|
.getInstance(TimeZone.getTimeZone("UTC")));
|
|
df.setLenient(false);
|
|
return df.parse(t);
|
|
} catch (ParseException pe)
|
|
{
|
|
log.error("Unable to parse date format", pe);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
public static String locationToName(Context context, String field, String value) throws SQLException {
|
|
if("location.comm".equals(field) || "location.coll".equals(field))
|
|
{
|
|
int type = field.equals("location.comm") ? Constants.COMMUNITY : Constants.COLLECTION;
|
|
DSpaceObject commColl = DSpaceObject.find(context, type, Integer.parseInt(value));
|
|
if(commColl != null)
|
|
{
|
|
return commColl.getName();
|
|
}
|
|
|
|
}
|
|
return value;
|
|
}
|
|
|
|
//******** SearchService implementation
|
|
@Override
|
|
public DiscoverResult search(Context context, DiscoverQuery query) throws SearchServiceException
|
|
{
|
|
return search(context, query, false);
|
|
}
|
|
|
|
@Override
|
|
public DiscoverResult search(Context context, DSpaceObject dso,
|
|
DiscoverQuery query)
|
|
throws SearchServiceException
|
|
{
|
|
return search(context, dso, query, false);
|
|
}
|
|
|
|
public DiscoverResult search(Context context, DSpaceObject dso, DiscoverQuery discoveryQuery, boolean includeWithdrawn) throws SearchServiceException {
|
|
if(dso != null)
|
|
{
|
|
if (dso instanceof Community)
|
|
{
|
|
discoveryQuery.addFilterQueries("location:m" + dso.getID());
|
|
} else if (dso instanceof Collection)
|
|
{
|
|
discoveryQuery.addFilterQueries("location:l" + dso.getID());
|
|
} else if (dso instanceof Item)
|
|
{
|
|
discoveryQuery.addFilterQueries("handle:" + dso.getHandle());
|
|
}
|
|
}
|
|
return search(context, discoveryQuery, includeWithdrawn);
|
|
|
|
}
|
|
|
|
|
|
public DiscoverResult search(Context context, DiscoverQuery discoveryQuery, boolean includeWithdrawn) throws SearchServiceException {
|
|
try {
|
|
if(getSolr() == null){
|
|
return new DiscoverResult();
|
|
}
|
|
SolrQuery solrQuery = resolveToSolrQuery(context, discoveryQuery, includeWithdrawn);
|
|
|
|
|
|
QueryResponse queryResponse = getSolr().query(solrQuery);
|
|
return retrieveResult(context, discoveryQuery, queryResponse);
|
|
|
|
} catch (Exception e)
|
|
{
|
|
throw new org.dspace.discovery.SearchServiceException(e.getMessage(),e);
|
|
}
|
|
}
|
|
|
|
protected SolrQuery resolveToSolrQuery(Context context, DiscoverQuery discoveryQuery, boolean includeWithdrawn)
|
|
{
|
|
SolrQuery solrQuery = new SolrQuery();
|
|
|
|
String query = "*:*";
|
|
if(discoveryQuery.getQuery() != null)
|
|
{
|
|
query = discoveryQuery.getQuery();
|
|
}
|
|
|
|
solrQuery.setQuery(query);
|
|
if(discoveryQuery.isSpellCheck())
|
|
{
|
|
solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query);
|
|
solrQuery.setParam(SpellingParams.SPELLCHECK_COLLATE, Boolean.TRUE);
|
|
solrQuery.setParam("spellcheck", Boolean.TRUE);
|
|
}
|
|
|
|
if (!includeWithdrawn)
|
|
{
|
|
solrQuery.addFilterQuery("NOT(withdrawn:true)");
|
|
}
|
|
|
|
for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++)
|
|
{
|
|
String filterQuery = discoveryQuery.getFilterQueries().get(i);
|
|
solrQuery.addFilterQuery(filterQuery);
|
|
}
|
|
if(discoveryQuery.getDSpaceObjectFilter() != -1)
|
|
{
|
|
solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter());
|
|
}
|
|
|
|
for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++)
|
|
{
|
|
String filterQuery = discoveryQuery.getFieldPresentQueries().get(i);
|
|
solrQuery.addFilterQuery(filterQuery + ":[* TO *]");
|
|
}
|
|
|
|
if(discoveryQuery.getStart() != -1)
|
|
{
|
|
solrQuery.setStart(discoveryQuery.getStart());
|
|
}
|
|
|
|
if(discoveryQuery.getMaxResults() != -1)
|
|
{
|
|
solrQuery.setRows(discoveryQuery.getMaxResults());
|
|
}
|
|
|
|
if(discoveryQuery.getSortField() != null)
|
|
{
|
|
SolrQuery.ORDER order = SolrQuery.ORDER.asc;
|
|
if(discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc))
|
|
order = SolrQuery.ORDER.desc;
|
|
|
|
solrQuery.addSortField(discoveryQuery.getSortField(), order);
|
|
}
|
|
|
|
for(String property : discoveryQuery.getProperties().keySet())
|
|
{
|
|
List<String> values = discoveryQuery.getProperties().get(property);
|
|
solrQuery.add(property, values.toArray(new String[values.size()]));
|
|
}
|
|
|
|
List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields();
|
|
if(0 < facetFields.size())
|
|
{
|
|
//Only add facet information if there are any facets
|
|
for (DiscoverFacetField facetFieldConfig : facetFields)
|
|
{
|
|
String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false);
|
|
solrQuery.addFacetField(field);
|
|
|
|
// Setting the facet limit in this fashion ensures that each facet can have its own max
|
|
solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT, String.valueOf(facetFieldConfig.getLimit()));
|
|
String facetSort;
|
|
if(DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder()))
|
|
{
|
|
facetSort = FacetParams.FACET_SORT_COUNT;
|
|
}else{
|
|
facetSort = FacetParams.FACET_SORT_INDEX;
|
|
}
|
|
solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort);
|
|
if (facetFieldConfig.getOffset() != -1)
|
|
{
|
|
solrQuery.setParam("f." + field + "."
|
|
+ FacetParams.FACET_OFFSET,
|
|
String.valueOf(facetFieldConfig.getOffset()));
|
|
}
|
|
if(facetFieldConfig.getPrefix() != null)
|
|
{
|
|
solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix());
|
|
}
|
|
}
|
|
|
|
List<String> facetQueries = discoveryQuery.getFacetQueries();
|
|
for (String facetQuery : facetQueries)
|
|
{
|
|
solrQuery.addFacetQuery(facetQuery);
|
|
}
|
|
|
|
if(discoveryQuery.getFacetMinCount() != -1)
|
|
{
|
|
solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount());
|
|
}
|
|
|
|
solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset()));
|
|
}
|
|
|
|
if(0 < discoveryQuery.getHitHighlightingFields().size())
|
|
{
|
|
solrQuery.setHighlight(true);
|
|
solrQuery.add(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
|
|
for (DiscoverHitHighlightingField highlightingField : discoveryQuery.getHitHighlightingFields())
|
|
{
|
|
solrQuery.addHighlightField(highlightingField.getField() + "_hl");
|
|
solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.FRAGSIZE, String.valueOf(highlightingField.getMaxChars()));
|
|
solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.SNIPPETS, String.valueOf(highlightingField.getMaxSnippets()));
|
|
}
|
|
|
|
}
|
|
|
|
//Add any configured search plugins !
|
|
List<SolrServiceSearchPlugin> solrServiceSearchPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceSearchPlugin.class);
|
|
for (SolrServiceSearchPlugin searchPlugin : solrServiceSearchPlugins)
|
|
{
|
|
searchPlugin.additionalSearchParameters(context, discoveryQuery, solrQuery);
|
|
}
|
|
return solrQuery;
|
|
}
|
|
|
|
@Override
|
|
public InputStream searchJSON(Context context, DiscoverQuery query, DSpaceObject dso, String jsonIdentifier) throws SearchServiceException {
|
|
if(dso != null)
|
|
{
|
|
if (dso instanceof Community)
|
|
{
|
|
query.addFilterQueries("location:m" + dso.getID());
|
|
} else if (dso instanceof Collection)
|
|
{
|
|
query.addFilterQueries("location:l" + dso.getID());
|
|
} else if (dso instanceof Item)
|
|
{
|
|
query.addFilterQueries("handle:" + dso.getHandle());
|
|
}
|
|
}
|
|
return searchJSON(context, query, jsonIdentifier);
|
|
}
|
|
|
|
|
|
public InputStream searchJSON(Context context, DiscoverQuery discoveryQuery, String jsonIdentifier) throws SearchServiceException {
|
|
if(getSolr() == null)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
SolrQuery solrQuery = resolveToSolrQuery(context, discoveryQuery, false);
|
|
//We use json as out output type
|
|
solrQuery.setParam("json.nl", "map");
|
|
solrQuery.setParam("json.wrf", jsonIdentifier);
|
|
solrQuery.setParam(CommonParams.WT, "json");
|
|
|
|
StringBuilder urlBuilder = new StringBuilder();
|
|
urlBuilder.append(getSolr().getBaseURL()).append("/select?");
|
|
urlBuilder.append(solrQuery.toString());
|
|
|
|
try {
|
|
GetMethod get = new GetMethod(urlBuilder.toString());
|
|
new HttpClient().executeMethod(get);
|
|
return get.getResponseBodyAsStream();
|
|
|
|
} catch (Exception e)
|
|
{
|
|
log.error("Error while getting json solr result for discovery search recommendation", e);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
protected DiscoverResult retrieveResult(Context context, DiscoverQuery query, QueryResponse solrQueryResponse) throws SQLException {
|
|
DiscoverResult result = new DiscoverResult();
|
|
|
|
if(solrQueryResponse != null)
|
|
{
|
|
result.setSearchTime(solrQueryResponse.getQTime());
|
|
result.setStart(query.getStart());
|
|
result.setMaxResults(query.getMaxResults());
|
|
result.setTotalSearchResults(solrQueryResponse.getResults().getNumFound());
|
|
|
|
List<String> searchFields = query.getSearchFields();
|
|
for (SolrDocument doc : solrQueryResponse.getResults())
|
|
{
|
|
DSpaceObject dso = findDSpaceObject(context, doc);
|
|
|
|
if(dso != null)
|
|
{
|
|
result.addDSpaceObject(dso);
|
|
} else {
|
|
log.error(LogManager.getHeader(context, "Error while retrieving DSpace object from discovery index", "Handle: " + doc.getFirstValue("handle")));
|
|
continue;
|
|
}
|
|
|
|
DiscoverResult.SearchDocument resultDoc = new DiscoverResult.SearchDocument();
|
|
//Add information about our search fields
|
|
for (String field : searchFields)
|
|
{
|
|
List<String> valuesAsString = new ArrayList<String>();
|
|
for (Object o : doc.getFieldValues(field))
|
|
{
|
|
valuesAsString.add(String.valueOf(o));
|
|
}
|
|
resultDoc.addSearchField(field, valuesAsString.toArray(new String[valuesAsString.size()]));
|
|
}
|
|
result.addSearchDocument(dso, resultDoc);
|
|
|
|
if(solrQueryResponse.getHighlighting() != null)
|
|
{
|
|
Map<String, List<String>> highlightedFields = solrQueryResponse.getHighlighting().get(dso.getType() + "-" + dso.getID());
|
|
if(MapUtils.isNotEmpty(highlightedFields))
|
|
{
|
|
//We need to remove all the "_hl" appendix strings from our keys
|
|
Map<String, List<String>> resultMap = new HashMap<String, List<String>>();
|
|
for(String key : highlightedFields.keySet())
|
|
{
|
|
resultMap.put(key.substring(0, key.lastIndexOf("_hl")), highlightedFields.get(key));
|
|
}
|
|
|
|
result.addHighlightedResult(dso, new DiscoverResult.DSpaceObjectHighlightResult(dso, resultMap));
|
|
}
|
|
}
|
|
}
|
|
|
|
//Resolve our facet field values
|
|
List<FacetField> facetFields = solrQueryResponse.getFacetFields();
|
|
if(facetFields != null)
|
|
{
|
|
for (int i = 0; i < facetFields.size(); i++)
|
|
{
|
|
FacetField facetField = facetFields.get(i);
|
|
DiscoverFacetField facetFieldConfig = query.getFacetFields().get(i);
|
|
List<FacetField.Count> facetValues = facetField.getValues();
|
|
if (facetValues != null)
|
|
{
|
|
if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE) && facetFieldConfig.getSortOrder().equals(DiscoveryConfigurationParameters.SORT.VALUE))
|
|
{
|
|
//If we have a date & are sorting by value, ensure that the results are flipped for a proper result
|
|
Collections.reverse(facetValues);
|
|
}
|
|
|
|
for (FacetField.Count facetValue : facetValues)
|
|
{
|
|
String displayedValue = transformDisplayedValue(context, facetField.getName(), facetValue.getName());
|
|
String field = transformFacetField(facetFieldConfig, facetField.getName(), true);
|
|
String authorityValue = transformAuthorityValue(context, facetField.getName(), facetValue.getName());
|
|
String sortValue = transformSortValue(context, facetField.getName(), facetValue.getName());
|
|
String filterValue = displayedValue;
|
|
if (StringUtils.isNotBlank(authorityValue))
|
|
{
|
|
filterValue = authorityValue;
|
|
}
|
|
result.addFacetResult(
|
|
field,
|
|
new DiscoverResult.FacetResult(filterValue,
|
|
displayedValue, authorityValue,
|
|
sortValue, facetValue.getCount()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(solrQueryResponse.getFacetQuery() != null)
|
|
{
|
|
// just retrieve the facets in the order they where requested!
|
|
// also for the date we ask it in proper (reverse) order
|
|
// At the moment facet queries are only used for dates
|
|
LinkedHashMap<String, Integer> sortedFacetQueries = new LinkedHashMap<String, Integer>(solrQueryResponse.getFacetQuery());
|
|
for(String facetQuery : sortedFacetQueries.keySet())
|
|
{
|
|
//TODO: do not assume this, people may want to use it for other ends, use a regex to make sure
|
|
//We have a facet query, the values looks something like: dateissued.year:[1990 TO 2000] AND -2000
|
|
//Prepare the string from {facet.field.name}:[startyear TO endyear] to startyear - endyear
|
|
String facetField = facetQuery.substring(0, facetQuery.indexOf(":"));
|
|
String name = facetQuery.substring(facetQuery.indexOf('[') + 1);
|
|
name = name.substring(0, name.lastIndexOf(']')).replaceAll("TO", "-");
|
|
String filter = facetQuery.substring(facetQuery.indexOf('['));
|
|
filter = filter.substring(0, filter.lastIndexOf(']') + 1);
|
|
|
|
Integer count = sortedFacetQueries.get(facetQuery);
|
|
|
|
//No need to show empty years
|
|
if(0 < count)
|
|
{
|
|
result.addFacetResult(facetField, new DiscoverResult.FacetResult(filter, name, null, name, count));
|
|
}
|
|
}
|
|
}
|
|
|
|
if(solrQueryResponse.getSpellCheckResponse() != null)
|
|
{
|
|
String recommendedQuery = solrQueryResponse.getSpellCheckResponse().getCollatedResult();
|
|
if(StringUtils.isNotBlank(recommendedQuery))
|
|
{
|
|
result.setSpellCheckQuery(recommendedQuery);
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
protected static DSpaceObject findDSpaceObject(Context context, SolrDocument doc) throws SQLException {
|
|
|
|
Integer type = (Integer) doc.getFirstValue("search.resourcetype");
|
|
Integer id = (Integer) doc.getFirstValue("search.resourceid");
|
|
String handle = (String) doc.getFirstValue("handle");
|
|
|
|
if (type != null && id != null)
|
|
{
|
|
return DSpaceObject.find(context, type, id);
|
|
} else if (handle != null)
|
|
{
|
|
return HandleManager.resolveToObject(context, handle);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
|
|
/** Simple means to return the search result as an InputStream */
|
|
public java.io.InputStream searchAsInputStream(DiscoverQuery query) throws SearchServiceException, java.io.IOException {
|
|
if(getSolr() == null)
|
|
{
|
|
return null;
|
|
}
|
|
HttpHost hostURL = (HttpHost)(getSolr().getHttpClient().getParams().getParameter(ClientPNames.DEFAULT_HOST));
|
|
|
|
HttpGet method = new HttpGet(hostURL.toHostString() + "");
|
|
try
|
|
{
|
|
URI uri = new URIBuilder(method.getURI()).addParameter("q",query.toString()).build();
|
|
}
|
|
catch (URISyntaxException e)
|
|
{
|
|
throw new SearchServiceException(e);
|
|
}
|
|
|
|
HttpResponse response = getSolr().getHttpClient().execute(method);
|
|
|
|
return response.getEntity().getContent();
|
|
}
|
|
|
|
public List<DSpaceObject> search(Context context, String query, int offset, int max, String... filterquery)
|
|
{
|
|
return search(context, query, null, true, offset, max, filterquery);
|
|
}
|
|
|
|
public List<DSpaceObject> search(Context context, String query, String orderfield, boolean ascending, int offset, int max, String... filterquery)
|
|
{
|
|
|
|
try {
|
|
if(getSolr() == null)
|
|
{
|
|
return Collections.emptyList();
|
|
}
|
|
|
|
SolrQuery solrQuery = new SolrQuery();
|
|
solrQuery.setQuery(query);
|
|
solrQuery.setFields("search.resourceid", "search.resourcetype");
|
|
solrQuery.setStart(offset);
|
|
solrQuery.setRows(max);
|
|
if (orderfield != null)
|
|
{
|
|
solrQuery.setSortField(orderfield, ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc);
|
|
}
|
|
if (filterquery != null)
|
|
{
|
|
solrQuery.addFilterQuery(filterquery);
|
|
}
|
|
QueryResponse rsp = getSolr().query(solrQuery);
|
|
SolrDocumentList docs = rsp.getResults();
|
|
|
|
Iterator iter = docs.iterator();
|
|
List<DSpaceObject> result = new ArrayList<DSpaceObject>();
|
|
while (iter.hasNext())
|
|
{
|
|
SolrDocument doc = (SolrDocument) iter.next();
|
|
|
|
DSpaceObject o = DSpaceObject.find(context, (Integer) doc.getFirstValue("search.resourcetype"), (Integer) doc.getFirstValue("search.resourceid"));
|
|
|
|
if (o != null)
|
|
{
|
|
result.add(o);
|
|
}
|
|
}
|
|
return result;
|
|
} catch (Exception e)
|
|
{
|
|
// Any acception that we get ignore it.
|
|
// We do NOT want any crashed to shown by the user
|
|
log.error(LogManager.getHeader(context, "Error while quering solr", "Queyr: " + query), e);
|
|
return new ArrayList<DSpaceObject>(0);
|
|
}
|
|
}
|
|
|
|
public DiscoverFilterQuery toFilterQuery(Context context, String field, String operator, String value) throws SQLException{
|
|
DiscoverFilterQuery result = new DiscoverFilterQuery();
|
|
|
|
StringBuilder filterQuery = new StringBuilder();
|
|
if(StringUtils.isNotBlank(field))
|
|
{
|
|
filterQuery.append(field);
|
|
if("equals".equals(operator))
|
|
{
|
|
//Query the keyword indexed field !
|
|
filterQuery.append("_keyword");
|
|
}
|
|
else if ("authority".equals(operator))
|
|
{
|
|
//Query the authority indexed field !
|
|
filterQuery.append("_authority");
|
|
}
|
|
else if ("notequals".equals(operator)
|
|
|| "notcontains".equals(operator)
|
|
|| "notauthority".equals(operator))
|
|
{
|
|
filterQuery.insert(0, "-");
|
|
}
|
|
filterQuery.append(":");
|
|
if("equals".equals(operator) || "notequals".equals(operator))
|
|
{
|
|
//DO NOT ESCAPE RANGE QUERIES !
|
|
if(!value.matches("\\[.*TO.*\\]"))
|
|
{
|
|
value = ClientUtils.escapeQueryChars(value);
|
|
filterQuery.append(value);
|
|
}
|
|
else
|
|
{
|
|
if (value.matches("\\[\\d{1,4} TO \\d{1,4}\\]"))
|
|
{
|
|
int minRange = Integer.parseInt(value.substring(1, value.length()-1).split(" TO ")[0]);
|
|
int maxRange = Integer.parseInt(value.substring(1, value.length()-1).split(" TO ")[1]);
|
|
value = "["+String.format("%04d", minRange) + " TO "+ String.format("%04d", maxRange) + "]";
|
|
}
|
|
filterQuery.append(value);
|
|
}
|
|
}
|
|
else{
|
|
//DO NOT ESCAPE RANGE QUERIES !
|
|
if(!value.matches("\\[.*TO.*\\]"))
|
|
{
|
|
value = ClientUtils.escapeQueryChars(value);
|
|
filterQuery.append("(").append(value).append(")");
|
|
}
|
|
else
|
|
{
|
|
filterQuery.append(value);
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
result.setDisplayedValue(transformDisplayedValue(context, field, value));
|
|
result.setFilterQuery(filterQuery.toString());
|
|
return result;
|
|
}
|
|
|
|
@Override
|
|
public List<Item> getRelatedItems(Context context, Item item, DiscoveryMoreLikeThisConfiguration mltConfig)
|
|
{
|
|
List<Item> results = new ArrayList<Item>();
|
|
try{
|
|
SolrQuery solrQuery = new SolrQuery();
|
|
//Set the query to handle since this is unique
|
|
solrQuery.setQuery("handle: " + item.getHandle());
|
|
//Add the more like this parameters !
|
|
solrQuery.setParam(MoreLikeThisParams.MLT, true);
|
|
//Add a comma separated list of the similar fields
|
|
@SuppressWarnings("unchecked")
|
|
java.util.Collection<String> similarityMetadataFields = CollectionUtils.collect(mltConfig.getSimilarityMetadataFields(), new Transformer()
|
|
{
|
|
@Override
|
|
public Object transform(Object input)
|
|
{
|
|
//Add the mlt appendix !
|
|
return input + "_mlt";
|
|
}
|
|
});
|
|
|
|
solrQuery.setParam(MoreLikeThisParams.SIMILARITY_FIELDS, StringUtils.join(similarityMetadataFields, ','));
|
|
solrQuery.setParam(MoreLikeThisParams.MIN_TERM_FREQ, String.valueOf(mltConfig.getMinTermFrequency()));
|
|
solrQuery.setParam(MoreLikeThisParams.DOC_COUNT, String.valueOf(mltConfig.getMax()));
|
|
solrQuery.setParam(MoreLikeThisParams.MIN_WORD_LEN, String.valueOf(mltConfig.getMinWordLength()));
|
|
|
|
if(getSolr() == null)
|
|
{
|
|
return Collections.emptyList();
|
|
}
|
|
QueryResponse rsp = getSolr().query(solrQuery);
|
|
NamedList mltResults = (NamedList) rsp.getResponse().get("moreLikeThis");
|
|
if(mltResults != null && mltResults.get(item.getType() + "-" + item.getID()) != null)
|
|
{
|
|
SolrDocumentList relatedDocs = (SolrDocumentList) mltResults.get(item.getType() + "-" + item.getID());
|
|
for (Object relatedDoc : relatedDocs)
|
|
{
|
|
SolrDocument relatedDocument = (SolrDocument) relatedDoc;
|
|
DSpaceObject relatedItem = findDSpaceObject(context, relatedDocument);
|
|
if (relatedItem.getType() == Constants.ITEM)
|
|
{
|
|
results.add((Item) relatedItem);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
} catch (Exception e)
|
|
{
|
|
log.error(LogManager.getHeader(context, "Error while retrieving related items", "Handle: " + item.getHandle()), e);
|
|
}
|
|
return results;
|
|
}
|
|
|
|
@Override
|
|
public String toSortFieldIndex(String metadataField, String type)
|
|
{
|
|
if(type.equals(DiscoveryConfigurationParameters.TYPE_DATE))
|
|
{
|
|
return metadataField + "_dt";
|
|
}else{
|
|
return metadataField + "_sort";
|
|
}
|
|
}
|
|
|
|
protected String transformFacetField(DiscoverFacetField facetFieldConfig, String field, boolean removePostfix)
|
|
{
|
|
if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_TEXT))
|
|
{
|
|
if(removePostfix)
|
|
{
|
|
return field.substring(0, field.lastIndexOf("_filter"));
|
|
}else{
|
|
return field + "_filter";
|
|
}
|
|
}else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE))
|
|
{
|
|
if(removePostfix)
|
|
{
|
|
return field.substring(0, field.lastIndexOf(".year"));
|
|
}else{
|
|
return field + ".year";
|
|
}
|
|
}else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_AC))
|
|
{
|
|
if(removePostfix)
|
|
{
|
|
return field.substring(0, field.lastIndexOf("_ac"));
|
|
}else{
|
|
return field + "_ac";
|
|
}
|
|
}else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_HIERARCHICAL))
|
|
{
|
|
if(removePostfix)
|
|
{
|
|
return StringUtils.substringBeforeLast(field, "_tax_");
|
|
}else{
|
|
//Only display top level filters !
|
|
return field + "_tax_0_filter";
|
|
}
|
|
}else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_AUTHORITY))
|
|
{
|
|
if(removePostfix)
|
|
{
|
|
return field.substring(0, field.lastIndexOf("_acid"));
|
|
}else{
|
|
return field + "_acid";
|
|
}
|
|
}else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_STANDARD))
|
|
{
|
|
return field;
|
|
}else{
|
|
return field;
|
|
}
|
|
}
|
|
|
|
protected String transformDisplayedValue(Context context, String field, String value) throws SQLException {
|
|
if(field.equals("location.comm") || field.equals("location.coll"))
|
|
{
|
|
value = locationToName(context, field, value);
|
|
}
|
|
else if (field.endsWith("_filter") || field.endsWith("_ac")
|
|
|| field.endsWith("_acid"))
|
|
{
|
|
//We have a filter make sure we split !
|
|
String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char");
|
|
if(separator == null)
|
|
{
|
|
separator = FILTER_SEPARATOR;
|
|
}
|
|
//Escape any regex chars
|
|
separator = java.util.regex.Pattern.quote(separator);
|
|
String[] fqParts = value.split(separator);
|
|
StringBuffer valueBuffer = new StringBuffer();
|
|
int start = fqParts.length / 2;
|
|
for(int i = start; i < fqParts.length; i++)
|
|
{
|
|
String[] split = fqParts[i].split(AUTHORITY_SEPARATOR, 2);
|
|
valueBuffer.append(split[0]);
|
|
}
|
|
value = valueBuffer.toString();
|
|
}else if(value.matches("\\((.*?)\\)"))
|
|
{
|
|
//The brackets where added for better solr results, remove the first & last one
|
|
value = value.substring(1, value.length() -1);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
protected String transformAuthorityValue(Context context, String field, String value) throws SQLException {
|
|
if(field.equals("location.comm") || field.equals("location.coll"))
|
|
{
|
|
return value;
|
|
}
|
|
if (field.endsWith("_filter") || field.endsWith("_ac")
|
|
|| field.endsWith("_acid"))
|
|
{
|
|
//We have a filter make sure we split !
|
|
String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char");
|
|
if(separator == null)
|
|
{
|
|
separator = FILTER_SEPARATOR;
|
|
}
|
|
//Escape any regex chars
|
|
separator = java.util.regex.Pattern.quote(separator);
|
|
String[] fqParts = value.split(separator);
|
|
StringBuffer authorityBuffer = new StringBuffer();
|
|
int start = fqParts.length / 2;
|
|
for(int i = start; i < fqParts.length; i++)
|
|
{
|
|
String[] split = fqParts[i].split(AUTHORITY_SEPARATOR, 2);
|
|
if (split.length == 2)
|
|
{
|
|
authorityBuffer.append(split[1]);
|
|
}
|
|
}
|
|
if (authorityBuffer.length() > 0)
|
|
{
|
|
return authorityBuffer.toString();
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
protected String transformSortValue(Context context, String field, String value) throws SQLException {
|
|
if(field.equals("location.comm") || field.equals("location.coll"))
|
|
{
|
|
value = locationToName(context, field, value);
|
|
}
|
|
else if (field.endsWith("_filter") || field.endsWith("_ac")
|
|
|| field.endsWith("_acid"))
|
|
{
|
|
//We have a filter make sure we split !
|
|
String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char");
|
|
if(separator == null)
|
|
{
|
|
separator = FILTER_SEPARATOR;
|
|
}
|
|
//Escape any regex chars
|
|
separator = java.util.regex.Pattern.quote(separator);
|
|
String[] fqParts = value.split(separator);
|
|
StringBuffer valueBuffer = new StringBuffer();
|
|
int end = fqParts.length / 2;
|
|
for(int i = 0; i < end; i++)
|
|
{
|
|
valueBuffer.append(fqParts[i]);
|
|
}
|
|
value = valueBuffer.toString();
|
|
}else if(value.matches("\\((.*?)\\)"))
|
|
{
|
|
//The brackets where added for better solr results, remove the first & last one
|
|
value = value.substring(1, value.length() -1);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
@Override
|
|
public void indexContent(Context context, DSpaceObject dso, boolean force,
|
|
boolean commit) throws SearchServiceException, SQLException {
|
|
indexContent(context, dso, force);
|
|
if (commit)
|
|
{
|
|
commit();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void commit() throws SearchServiceException {
|
|
try {
|
|
if(getSolr() != null)
|
|
{
|
|
getSolr().commit();
|
|
}
|
|
} catch (Exception e) {
|
|
throw new SearchServiceException(e.getMessage(), e);
|
|
}
|
|
}
|
|
}
|