diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml
index ea3692cf71..0cd5b87448 100644
--- a/dspace-api/pom.xml
+++ b/dspace-api/pom.xml
@@ -505,6 +505,11 @@
contiperf
test
+
+ org.mockito
+ mockito-core
+ test
+
org.rometools
rome-modules
diff --git a/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java b/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java
index 05eceac641..99dae109db 100644
--- a/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java
+++ b/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java
@@ -10,6 +10,7 @@ package org.dspace.discovery;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.Charsets;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.solr.common.util.ContentStreamBase;
@@ -43,19 +44,25 @@ public class FullTextContentStreams extends ContentStreamBase
public static final String FULLTEXT_BUNDLE = "TEXT";
protected final Context context;
- protected final List fullTextStreams;
+ protected List fullTextStreams;
protected BitstreamService bitstreamService;
public FullTextContentStreams(Context context, Item parentItem) throws SQLException {
this.context = context;
+ init(parentItem);
+ }
+
+ protected void init(Item parentItem) {
fullTextStreams = new LinkedList<>();
- sourceInfo = parentItem.getHandle();
- bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
- //extracted full text is always extracted as plain text
- contentType = "text/plain";
+ if(parentItem != null) {
+ sourceInfo = parentItem.getHandle();
- buildFullTextList(parentItem);
+ //extracted full text is always extracted as plain text
+ contentType = "text/plain";
+
+ buildFullTextList(parentItem);
+ }
}
private void buildFullTextList(Item parentItem) {
@@ -123,7 +130,7 @@ public class FullTextContentStreams extends ContentStreamBase
return new SequenceInputStream(new FullTextEnumeration(fullTextStreams.iterator()));
} catch (Exception e) {
log.error("Unable to add full text bitstreams to SOLR for item " + sourceInfo + ": " + e.getMessage(), e);
- return new ByteArrayInputStream(e.getMessage().getBytes(StandardCharsets.UTF_8));
+ return new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8));
}
}
@@ -131,6 +138,13 @@ public class FullTextContentStreams extends ContentStreamBase
return CollectionUtils.isEmpty(fullTextStreams);
}
+ private BitstreamService getBitstreamService() {
+ if(bitstreamService == null) {
+ bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
+ }
+ return bitstreamService;
+ }
+
private class FullTextBitstream {
private String itemHandle;
private Bitstream bitstream;
@@ -154,7 +168,7 @@ public class FullTextContentStreams extends ContentStreamBase
}
public InputStream getInputStream() throws SQLException, IOException, AuthorizeException {
- return bitstreamService.retrieve(context, bitstream);
+ return getBitstreamService().retrieve(context, bitstream);
}
public String getItemHandle() {
@@ -186,10 +200,10 @@ public class FullTextContentStreams extends ContentStreamBase
bitstream.getFileName() + " for item " + bitstream.getItemHandle())
+ " to SOLR:" + e.getMessage(), e);
- inputStream = new ByteArrayInputStream(e.getMessage().getBytes(StandardCharsets.UTF_8));
+ inputStream = new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8));
}
- return inputStream;
+ return new SequenceInputStream(new ByteArrayInputStream("\n".getBytes(Charsets.UTF_8)), inputStream);
}
}
diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java
index 24fe669eb7..1ff46a4024 100644
--- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java
+++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java
@@ -1417,8 +1417,6 @@ public class SolrServiceImpl implements SearchService, IndexingService {
log.debug(" Added Grouping");
- FullTextContentStreams textContentStreams = new FullTextContentStreams(context, item);
-
//Do any additional indexing, depends on the plugins
List solrServiceIndexPlugins = DSpaceServicesFactory.getInstance().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class);
for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins)
@@ -1428,7 +1426,7 @@ public class SolrServiceImpl implements SearchService, IndexingService {
// write the index and close the inputstreamreaders
try {
- writeDocument(doc, textContentStreams);
+ writeDocument(doc, new FullTextContentStreams(context, item));
log.info("Wrote Item: " + handle + " to Index");
} catch (RuntimeException e)
{
diff --git a/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java b/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java
new file mode 100644
index 0000000000..d5d09bf5d4
--- /dev/null
+++ b/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java
@@ -0,0 +1,191 @@
+package org.dspace.discovery;
+
+import org.apache.commons.io.Charsets;
+import org.apache.commons.io.IOUtils;
+import org.dspace.content.Bitstream;
+import org.dspace.content.Bundle;
+import org.dspace.content.Item;
+import org.dspace.content.service.BitstreamService;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class FullTextContentStreamsTest {
+
+ private static final String HANDLE = "1234567/123";
+ private static final String CONTENT_TYPE = "text/plain";
+
+ @InjectMocks
+ private FullTextContentStreams streams;
+
+ @Mock
+ private BitstreamService bitstreamService;
+
+ @Mock
+ private Item item;
+
+ @Mock
+ private Bundle originalBundle;
+
+ @Mock
+ private Bundle textBundle;
+
+ @Mock
+ private Bitstream textBitstream1;
+
+ @Mock
+ private Bitstream textBitstream2;
+
+ @Mock
+ private Bitstream textBitstream3;
+
+
+ @Before
+ public void setUp() throws Exception {
+ when(item.getHandle()).thenReturn(HANDLE);
+ when(originalBundle.getName()).thenReturn("ORIGINAL");
+ when(textBundle.getName()).thenReturn("TEXT");
+
+ when(textBitstream1.getName()).thenReturn("Full Text 1");
+ when(textBitstream2.getName()).thenReturn("Full Text 2");
+ when(textBitstream3.getName()).thenReturn("Full Text 3");
+
+ when(textBitstream1.getSize()).thenReturn(1L);
+ when(textBitstream2.getSize()).thenReturn(2L);
+ when(textBitstream3.getSize()).thenReturn(3L);
+
+ when(bitstreamService.retrieve(null, textBitstream1)).thenReturn(new ByteArrayInputStream("This is text 1".getBytes(Charsets.UTF_8)));
+ when(bitstreamService.retrieve(null, textBitstream2)).thenReturn(new ByteArrayInputStream("This is text 2".getBytes(Charsets.UTF_8)));
+ when(bitstreamService.retrieve(null, textBitstream3)).thenReturn(new ByteArrayInputStream("This is text 3".getBytes(Charsets.UTF_8)));
+
+ streams.bitstreamService = bitstreamService;
+ }
+
+ @Test
+ public void testItemWithNoBundles() throws Exception {
+ when(item.getBundles()).thenReturn(null);
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should be empty", "", streams.getName());
+ assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
+ assertTrue("Content stream should be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ assertEquals("Input stream should be empty", -1, inputStream.read());
+ }
+
+ @Test
+ public void testItemWithOnlyOriginalBundle() throws Exception {
+ when(item.getBundles()).thenReturn(Arrays.asList(originalBundle));
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should be empty", "", streams.getName());
+ assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
+ assertTrue("Content stream should be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ assertEquals("Input stream should be empty", -1, inputStream.read());
+ }
+
+ @Test
+ public void testItemWithEmptyTextBundle() throws Exception {
+ when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
+ when(textBundle.getBitstreams()).thenReturn(null);
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should be empty", "", streams.getName());
+ assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
+ assertTrue("Content stream should be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ assertEquals("Input stream should be empty", -1, inputStream.read());
+ }
+
+ @Test
+ public void testItemWithOnlyOneTextBitstream() throws Exception {
+ when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
+ when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1));
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should match the name of the bitstream", "Full Text 1", streams.getName());
+ assertEquals("The size of the streams should match the size of bitstream 1", (Long) 1L, streams.getSize());
+ assertFalse("Content stream should not be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ assertEquals("The data in the input stream should match the text of the bitstream", "\nThis is text 1",
+ IOUtils.toString(inputStream, Charsets.UTF_8));
+ }
+
+ @Test
+ public void testItemWithMultipleTextBitstreams() throws Exception {
+ when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
+ when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3));
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should match the concatenation of the names of the bitstreams",
+ "Full Text 1;Full Text 2;Full Text 3", streams.getName());
+ assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize());
+ assertFalse("Content stream should not be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ assertEquals("The data in the input stream should match 'This is text 1'", "\nThis is text 1" +
+ "\nThis is text 2\nThis is text 3", IOUtils.toString(inputStream, Charsets.UTF_8));
+ }
+
+ @Test
+ public void testBitstreamThrowingExceptionShouldNotStopIndexing() throws Exception {
+ when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
+ when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3));
+ when(bitstreamService.retrieve(null, textBitstream2)).thenThrow(new IOException("NOTFOUND"));
+
+ streams.init(item);
+
+ assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
+ assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
+ assertEquals("The name should match the concatenation of the names of the bitstreams",
+ "Full Text 1;Full Text 2;Full Text 3", streams.getName());
+ assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize());
+ assertFalse("Content stream should not be empty", streams.isEmpty());
+ InputStream inputStream = streams.getStream();
+ assertNotNull(inputStream);
+ String content = IOUtils.toString(inputStream, Charsets.UTF_8);
+ assertTrue("The data should contain data of the first bitstream that is not corrupt",
+ content.contains("This is text 1"));
+ assertFalse("The data should NOT contain data of the second bitstream that is corrupt",
+ content.contains("This is text 2"));
+ assertTrue("The data should contain data of the third bistream that is not corrupt",
+ content.contains("This is text 3"));
+ assertTrue("The data should contain data on the exception that occurred",
+ content.contains("java.io.IOException"));
+ assertTrue("The data should contain data on the exception that occurred",
+ content.contains("NOTFOUND"));
+ }
+
+}
\ No newline at end of file
diff --git a/dspace-oai/pom.xml b/dspace-oai/pom.xml
index 2d89c2b6d0..c66b5fd84a 100644
--- a/dspace-oai/pom.xml
+++ b/dspace-oai/pom.xml
@@ -231,7 +231,6 @@
org.mockito
mockito-core
- 1.10.19
test
diff --git a/pom.xml b/pom.xml
index 42eb344b6b..2c84d1d4c1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1389,6 +1389,11 @@
3.0.1u2
provided
+
+ org.mockito
+ mockito-core
+ 1.10.19
+