diff --git a/dspace-api/src/main/java/org/dspace/statistics/SolrLoggerServiceImpl.java b/dspace-api/src/main/java/org/dspace/statistics/SolrLoggerServiceImpl.java index 55adc561e6..8f5b4d8510 100644 --- a/dspace-api/src/main/java/org/dspace/statistics/SolrLoggerServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/statistics/SolrLoggerServiceImpl.java @@ -25,13 +25,16 @@ import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.client.solrj.request.LukeRequest; import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.RangeFacet; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.*; import org.dspace.content.*; import org.dspace.content.Collection; @@ -72,7 +75,9 @@ import java.util.*; public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean { private static final Logger log = Logger.getLogger(SolrLoggerServiceImpl.class); - + + private static final String MULTIPLE_VALUES_SPLITTER = "|"; + protected HttpSolrServer solr; public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; @@ -1236,6 +1241,10 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea yearQueryParams.put(CommonParams.FQ, filterQuery.toString()); yearQueryParams.put(CommonParams.WT, "csv"); + //Tell SOLR how to escape and separate the values of multi-valued fields + yearQueryParams.put("csv.escape", "\\"); + yearQueryParams.put("csv.mv.separator", MULTIPLE_VALUES_SPLITTER); + //Start by creating a new core String coreName = "statistics-" + dcStart.getYear(); HttpSolrServer statisticsYearServer = createCore(solr, coreName); @@ -1260,16 +1269,26 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000))); } + Set multivaluedFields = getMultivaluedFieldNames(); + for (File tempCsv : filesToUpload) { //Upload the data in the csv files to our new solr core ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); + contentStreamUpdateRequest.setParam("escape", "\\"); contentStreamUpdateRequest.setParam("skip", "_version_"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); + //Add parsing directives for the multivalued fields so that they are stored as separate values instead of one value + for (String multivaluedField : multivaluedFields) { + contentStreamUpdateRequest.setParam("f." + multivaluedField + ".split", Boolean.TRUE.toString()); + contentStreamUpdateRequest.setParam("f." + multivaluedField + ".separator", MULTIPLE_VALUES_SPLITTER); + } + statisticsYearServer.request(contentStreamUpdateRequest); } + statisticsYearServer.commit(true, true); @@ -1296,6 +1315,33 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea return new HttpSolrServer(baseSolrUrl + "/" + coreName); } + /** + * Retrieves a list of all the multi valued fields in the solr core + * @return all fields tagged as multivalued + * @throws SolrServerException When getting the schema information from the SOLR core fails + * @throws IOException When connection to the SOLR server fails + */ + public Set getMultivaluedFieldNames() throws SolrServerException, IOException { + Set multivaluedFields = new HashSet(); + LukeRequest lukeRequest = new LukeRequest(); + lukeRequest.setShowSchema(true); + LukeResponse process = lukeRequest.process(solr); + Map fields = process.getFieldInfo(); + for(String fieldName : fields.keySet()) + { + LukeResponse.FieldInfo fieldInfo = fields.get(fieldName); + EnumSet flags = fieldInfo.getFlags(); + for(FieldFlag fieldFlag : flags) + { + if(fieldFlag.getAbbreviation() == FieldFlag.MULTI_VALUED.getAbbreviation()) + { + multivaluedFields.add(fieldName); + } + } + } + return multivaluedFields; + } + @Override public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {