mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-12 12:33:18 +00:00
DS-3436: Tell SOLR to split values of multi-valued fields when sharding cores
This commit is contained in:
@@ -25,13 +25,16 @@ import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
|||||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||||
import org.apache.solr.client.solrj.response.FacetField;
|
import org.apache.solr.client.solrj.response.FacetField;
|
||||||
|
import org.apache.solr.client.solrj.response.LukeResponse;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.client.solrj.response.RangeFacet;
|
import org.apache.solr.client.solrj.response.RangeFacet;
|
||||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrDocumentList;
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.luke.FieldFlag;
|
||||||
import org.apache.solr.common.params.*;
|
import org.apache.solr.common.params.*;
|
||||||
import org.dspace.content.*;
|
import org.dspace.content.*;
|
||||||
import org.dspace.content.Collection;
|
import org.dspace.content.Collection;
|
||||||
@@ -72,7 +75,9 @@ import java.util.*;
|
|||||||
public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean
|
public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean
|
||||||
{
|
{
|
||||||
private static final Logger log = Logger.getLogger(SolrLoggerServiceImpl.class);
|
private static final Logger log = Logger.getLogger(SolrLoggerServiceImpl.class);
|
||||||
|
|
||||||
|
private static final String MULTIPLE_VALUES_SPLITTER = "|";
|
||||||
|
|
||||||
protected HttpSolrServer solr;
|
protected HttpSolrServer solr;
|
||||||
|
|
||||||
public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
||||||
@@ -1236,6 +1241,10 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
|
|||||||
yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
|
yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
|
||||||
yearQueryParams.put(CommonParams.WT, "csv");
|
yearQueryParams.put(CommonParams.WT, "csv");
|
||||||
|
|
||||||
|
//Tell SOLR how to escape and separate the values of multi-valued fields
|
||||||
|
yearQueryParams.put("csv.escape", "\\");
|
||||||
|
yearQueryParams.put("csv.mv.separator", MULTIPLE_VALUES_SPLITTER);
|
||||||
|
|
||||||
//Start by creating a new core
|
//Start by creating a new core
|
||||||
String coreName = "statistics-" + dcStart.getYear();
|
String coreName = "statistics-" + dcStart.getYear();
|
||||||
HttpSolrServer statisticsYearServer = createCore(solr, coreName);
|
HttpSolrServer statisticsYearServer = createCore(solr, coreName);
|
||||||
@@ -1260,16 +1269,26 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
|
|||||||
yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
|
yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Set<String> multivaluedFields = getMultivaluedFieldNames();
|
||||||
|
|
||||||
for (File tempCsv : filesToUpload) {
|
for (File tempCsv : filesToUpload) {
|
||||||
//Upload the data in the csv files to our new solr core
|
//Upload the data in the csv files to our new solr core
|
||||||
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
|
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
|
||||||
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
|
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
|
||||||
|
contentStreamUpdateRequest.setParam("escape", "\\");
|
||||||
contentStreamUpdateRequest.setParam("skip", "_version_");
|
contentStreamUpdateRequest.setParam("skip", "_version_");
|
||||||
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
||||||
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");
|
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");
|
||||||
|
|
||||||
|
//Add parsing directives for the multivalued fields so that they are stored as separate values instead of one value
|
||||||
|
for (String multivaluedField : multivaluedFields) {
|
||||||
|
contentStreamUpdateRequest.setParam("f." + multivaluedField + ".split", Boolean.TRUE.toString());
|
||||||
|
contentStreamUpdateRequest.setParam("f." + multivaluedField + ".separator", MULTIPLE_VALUES_SPLITTER);
|
||||||
|
}
|
||||||
|
|
||||||
statisticsYearServer.request(contentStreamUpdateRequest);
|
statisticsYearServer.request(contentStreamUpdateRequest);
|
||||||
}
|
}
|
||||||
|
|
||||||
statisticsYearServer.commit(true, true);
|
statisticsYearServer.commit(true, true);
|
||||||
|
|
||||||
|
|
||||||
@@ -1296,6 +1315,33 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
|
|||||||
return new HttpSolrServer(baseSolrUrl + "/" + coreName);
|
return new HttpSolrServer(baseSolrUrl + "/" + coreName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a list of all the multi valued fields in the solr core
|
||||||
|
* @return all fields tagged as multivalued
|
||||||
|
* @throws SolrServerException When getting the schema information from the SOLR core fails
|
||||||
|
* @throws IOException When connection to the SOLR server fails
|
||||||
|
*/
|
||||||
|
public Set<String> getMultivaluedFieldNames() throws SolrServerException, IOException {
|
||||||
|
Set<String> multivaluedFields = new HashSet<String>();
|
||||||
|
LukeRequest lukeRequest = new LukeRequest();
|
||||||
|
lukeRequest.setShowSchema(true);
|
||||||
|
LukeResponse process = lukeRequest.process(solr);
|
||||||
|
Map<String, LukeResponse.FieldInfo> fields = process.getFieldInfo();
|
||||||
|
for(String fieldName : fields.keySet())
|
||||||
|
{
|
||||||
|
LukeResponse.FieldInfo fieldInfo = fields.get(fieldName);
|
||||||
|
EnumSet<FieldFlag> flags = fieldInfo.getFlags();
|
||||||
|
for(FieldFlag fieldFlag : flags)
|
||||||
|
{
|
||||||
|
if(fieldFlag.getAbbreviation() == FieldFlag.MULTI_VALUED.getAbbreviation())
|
||||||
|
{
|
||||||
|
multivaluedFields.add(fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return multivaluedFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
|
public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
|
||||||
|
Reference in New Issue
Block a user