diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index ea3692cf71..0cd5b87448 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -505,6 +505,11 @@ contiperf test + + org.mockito + mockito-core + test + org.rometools rome-modules diff --git a/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java b/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java index 05eceac641..99dae109db 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java +++ b/dspace-api/src/main/java/org/dspace/discovery/FullTextContentStreams.java @@ -10,6 +10,7 @@ package org.dspace.discovery; import com.google.common.base.Function; import com.google.common.collect.Iterables; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.io.Charsets; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.apache.solr.common.util.ContentStreamBase; @@ -43,19 +44,25 @@ public class FullTextContentStreams extends ContentStreamBase public static final String FULLTEXT_BUNDLE = "TEXT"; protected final Context context; - protected final List fullTextStreams; + protected List fullTextStreams; protected BitstreamService bitstreamService; public FullTextContentStreams(Context context, Item parentItem) throws SQLException { this.context = context; + init(parentItem); + } + + protected void init(Item parentItem) { fullTextStreams = new LinkedList<>(); - sourceInfo = parentItem.getHandle(); - bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); - //extracted full text is always extracted as plain text - contentType = "text/plain"; + if(parentItem != null) { + sourceInfo = parentItem.getHandle(); - buildFullTextList(parentItem); + //extracted full text is always extracted as plain text + contentType = "text/plain"; + + buildFullTextList(parentItem); + } } private void buildFullTextList(Item parentItem) { @@ -123,7 +130,7 @@ public class FullTextContentStreams extends ContentStreamBase return new SequenceInputStream(new FullTextEnumeration(fullTextStreams.iterator())); } catch (Exception e) { log.error("Unable to add full text bitstreams to SOLR for item " + sourceInfo + ": " + e.getMessage(), e); - return new ByteArrayInputStream(e.getMessage().getBytes(StandardCharsets.UTF_8)); + return new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8)); } } @@ -131,6 +138,13 @@ public class FullTextContentStreams extends ContentStreamBase return CollectionUtils.isEmpty(fullTextStreams); } + private BitstreamService getBitstreamService() { + if(bitstreamService == null) { + bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); + } + return bitstreamService; + } + private class FullTextBitstream { private String itemHandle; private Bitstream bitstream; @@ -154,7 +168,7 @@ public class FullTextContentStreams extends ContentStreamBase } public InputStream getInputStream() throws SQLException, IOException, AuthorizeException { - return bitstreamService.retrieve(context, bitstream); + return getBitstreamService().retrieve(context, bitstream); } public String getItemHandle() { @@ -186,10 +200,10 @@ public class FullTextContentStreams extends ContentStreamBase bitstream.getFileName() + " for item " + bitstream.getItemHandle()) + " to SOLR:" + e.getMessage(), e); - inputStream = new ByteArrayInputStream(e.getMessage().getBytes(StandardCharsets.UTF_8)); + inputStream = new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8)); } - return inputStream; + return new SequenceInputStream(new ByteArrayInputStream("\n".getBytes(Charsets.UTF_8)), inputStream); } } diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index 24fe669eb7..1ff46a4024 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -1417,8 +1417,6 @@ public class SolrServiceImpl implements SearchService, IndexingService { log.debug(" Added Grouping"); - FullTextContentStreams textContentStreams = new FullTextContentStreams(context, item); - //Do any additional indexing, depends on the plugins List solrServiceIndexPlugins = DSpaceServicesFactory.getInstance().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class); for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins) @@ -1428,7 +1426,7 @@ public class SolrServiceImpl implements SearchService, IndexingService { // write the index and close the inputstreamreaders try { - writeDocument(doc, textContentStreams); + writeDocument(doc, new FullTextContentStreams(context, item)); log.info("Wrote Item: " + handle + " to Index"); } catch (RuntimeException e) { diff --git a/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java b/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java new file mode 100644 index 0000000000..d5d09bf5d4 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/discovery/FullTextContentStreamsTest.java @@ -0,0 +1,191 @@ +package org.dspace.discovery; + +import org.apache.commons.io.Charsets; +import org.apache.commons.io.IOUtils; +import org.dspace.content.Bitstream; +import org.dspace.content.Bundle; +import org.dspace.content.Item; +import org.dspace.content.service.BitstreamService; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class FullTextContentStreamsTest { + + private static final String HANDLE = "1234567/123"; + private static final String CONTENT_TYPE = "text/plain"; + + @InjectMocks + private FullTextContentStreams streams; + + @Mock + private BitstreamService bitstreamService; + + @Mock + private Item item; + + @Mock + private Bundle originalBundle; + + @Mock + private Bundle textBundle; + + @Mock + private Bitstream textBitstream1; + + @Mock + private Bitstream textBitstream2; + + @Mock + private Bitstream textBitstream3; + + + @Before + public void setUp() throws Exception { + when(item.getHandle()).thenReturn(HANDLE); + when(originalBundle.getName()).thenReturn("ORIGINAL"); + when(textBundle.getName()).thenReturn("TEXT"); + + when(textBitstream1.getName()).thenReturn("Full Text 1"); + when(textBitstream2.getName()).thenReturn("Full Text 2"); + when(textBitstream3.getName()).thenReturn("Full Text 3"); + + when(textBitstream1.getSize()).thenReturn(1L); + when(textBitstream2.getSize()).thenReturn(2L); + when(textBitstream3.getSize()).thenReturn(3L); + + when(bitstreamService.retrieve(null, textBitstream1)).thenReturn(new ByteArrayInputStream("This is text 1".getBytes(Charsets.UTF_8))); + when(bitstreamService.retrieve(null, textBitstream2)).thenReturn(new ByteArrayInputStream("This is text 2".getBytes(Charsets.UTF_8))); + when(bitstreamService.retrieve(null, textBitstream3)).thenReturn(new ByteArrayInputStream("This is text 3".getBytes(Charsets.UTF_8))); + + streams.bitstreamService = bitstreamService; + } + + @Test + public void testItemWithNoBundles() throws Exception { + when(item.getBundles()).thenReturn(null); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should be empty", "", streams.getName()); + assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize()); + assertTrue("Content stream should be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + assertEquals("Input stream should be empty", -1, inputStream.read()); + } + + @Test + public void testItemWithOnlyOriginalBundle() throws Exception { + when(item.getBundles()).thenReturn(Arrays.asList(originalBundle)); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should be empty", "", streams.getName()); + assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize()); + assertTrue("Content stream should be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + assertEquals("Input stream should be empty", -1, inputStream.read()); + } + + @Test + public void testItemWithEmptyTextBundle() throws Exception { + when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle)); + when(textBundle.getBitstreams()).thenReturn(null); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should be empty", "", streams.getName()); + assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize()); + assertTrue("Content stream should be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + assertEquals("Input stream should be empty", -1, inputStream.read()); + } + + @Test + public void testItemWithOnlyOneTextBitstream() throws Exception { + when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle)); + when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1)); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should match the name of the bitstream", "Full Text 1", streams.getName()); + assertEquals("The size of the streams should match the size of bitstream 1", (Long) 1L, streams.getSize()); + assertFalse("Content stream should not be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + assertEquals("The data in the input stream should match the text of the bitstream", "\nThis is text 1", + IOUtils.toString(inputStream, Charsets.UTF_8)); + } + + @Test + public void testItemWithMultipleTextBitstreams() throws Exception { + when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle)); + when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3)); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should match the concatenation of the names of the bitstreams", + "Full Text 1;Full Text 2;Full Text 3", streams.getName()); + assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize()); + assertFalse("Content stream should not be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + assertEquals("The data in the input stream should match 'This is text 1'", "\nThis is text 1" + + "\nThis is text 2\nThis is text 3", IOUtils.toString(inputStream, Charsets.UTF_8)); + } + + @Test + public void testBitstreamThrowingExceptionShouldNotStopIndexing() throws Exception { + when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle)); + when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3)); + when(bitstreamService.retrieve(null, textBitstream2)).thenThrow(new IOException("NOTFOUND")); + + streams.init(item); + + assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo()); + assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType()); + assertEquals("The name should match the concatenation of the names of the bitstreams", + "Full Text 1;Full Text 2;Full Text 3", streams.getName()); + assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize()); + assertFalse("Content stream should not be empty", streams.isEmpty()); + InputStream inputStream = streams.getStream(); + assertNotNull(inputStream); + String content = IOUtils.toString(inputStream, Charsets.UTF_8); + assertTrue("The data should contain data of the first bitstream that is not corrupt", + content.contains("This is text 1")); + assertFalse("The data should NOT contain data of the second bitstream that is corrupt", + content.contains("This is text 2")); + assertTrue("The data should contain data of the third bistream that is not corrupt", + content.contains("This is text 3")); + assertTrue("The data should contain data on the exception that occurred", + content.contains("java.io.IOException")); + assertTrue("The data should contain data on the exception that occurred", + content.contains("NOTFOUND")); + } + +} \ No newline at end of file diff --git a/dspace-oai/pom.xml b/dspace-oai/pom.xml index 2d89c2b6d0..c66b5fd84a 100644 --- a/dspace-oai/pom.xml +++ b/dspace-oai/pom.xml @@ -231,7 +231,6 @@ org.mockito mockito-core - 1.10.19 test diff --git a/pom.xml b/pom.xml index 42eb344b6b..2c84d1d4c1 100644 --- a/pom.xml +++ b/pom.xml @@ -1389,6 +1389,11 @@ 3.0.1u2 provided + + org.mockito + mockito-core + 1.10.19 +