mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
Merge pull request #1595 from tomdesair/DS-2952_SOLR-full-text-indexing-multiple-bitstreams
DS-2952 SOLR full text indexing multiple bitstreams
This commit is contained in:
@@ -505,6 +505,11 @@
|
||||
<artifactId>contiperf</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.rometools</groupId>
|
||||
<artifactId>rome-modules</artifactId>
|
||||
|
@@ -16,16 +16,13 @@ import java.math.BigInteger;
|
||||
import java.rmi.dgc.VMID;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.Date;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.text.ParseException;
|
||||
import com.coverity.security.Escape;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
@@ -413,4 +410,8 @@ public final class Utils
|
||||
int rl = result.length();
|
||||
return result.substring(0, rl-2) + ":" + result.substring(rl-2);
|
||||
}
|
||||
|
||||
public static <E> Collection<E> emptyIfNull(Collection<E> collection) {
|
||||
return collection == null ? Collections.<E>emptyList() : collection;
|
||||
}
|
||||
}
|
||||
|
@@ -1,81 +0,0 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.discovery;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.BitstreamService;
|
||||
import org.dspace.core.Context;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.SQLException;
|
||||
|
||||
/**
|
||||
* Construct a <code>ContentStream</code> from a <code>File</code>
|
||||
*/
|
||||
public class BitstreamContentStream extends ContentStreamBase
|
||||
{
|
||||
private static final Logger log = Logger.getLogger(BitstreamContentStream.class);
|
||||
protected final Context context;
|
||||
protected final Bitstream file;
|
||||
protected BitstreamService bitstreamService;
|
||||
|
||||
public BitstreamContentStream(Context context, Bitstream f ) throws SQLException {
|
||||
file = f;
|
||||
this.context = context;
|
||||
|
||||
contentType = f.getFormat(context).getMIMEType();
|
||||
name = file.getName();
|
||||
size = file.getSize();
|
||||
sourceInfo = file.getName();
|
||||
bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
if(contentType==null) {
|
||||
InputStream stream = null;
|
||||
try {
|
||||
stream = bitstreamService.retrieve(context, file);
|
||||
char first = (char)stream.read();
|
||||
if(first == '<') {
|
||||
return "application/xml";
|
||||
}
|
||||
if(first == '{') {
|
||||
return "application/json";
|
||||
}
|
||||
} catch(Exception ex) {
|
||||
log.error("Error determining content type for bitstream:" + file.getID(), ex);
|
||||
} finally {
|
||||
if (stream != null) try {
|
||||
stream.close();
|
||||
} catch (IOException ioe) {
|
||||
log.error("Error closing stream:" + file.getID(), ioe);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
return contentType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getStream() throws IOException {
|
||||
try {
|
||||
return bitstreamService.retrieve(context, file);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(),e);
|
||||
return new ByteArrayInputStream(e.getMessage().getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.discovery;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.io.Charsets;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.BitstreamFormat;
|
||||
import org.dspace.content.Bundle;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.BitstreamService;
|
||||
import org.dspace.core.Context;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.dspace.core.Utils.emptyIfNull;
|
||||
|
||||
/**
|
||||
* Construct a <code>ContentStream</code> from a <code>File</code>
|
||||
*/
|
||||
public class FullTextContentStreams extends ContentStreamBase
|
||||
{
|
||||
private static final Logger log = Logger.getLogger(FullTextContentStreams.class);
|
||||
|
||||
public static final String FULLTEXT_BUNDLE = "TEXT";
|
||||
|
||||
protected final Context context;
|
||||
protected List<FullTextBitstream> fullTextStreams;
|
||||
protected BitstreamService bitstreamService;
|
||||
|
||||
public FullTextContentStreams(Context context, Item parentItem) throws SQLException {
|
||||
this.context = context;
|
||||
init(parentItem);
|
||||
}
|
||||
|
||||
protected void init(Item parentItem) {
|
||||
fullTextStreams = new LinkedList<>();
|
||||
|
||||
if(parentItem != null) {
|
||||
sourceInfo = parentItem.getHandle();
|
||||
|
||||
//extracted full text is always extracted as plain text
|
||||
contentType = "text/plain";
|
||||
|
||||
buildFullTextList(parentItem);
|
||||
}
|
||||
}
|
||||
|
||||
private void buildFullTextList(Item parentItem) {
|
||||
// now get full text of any bitstreams in the TEXT bundle
|
||||
// trundle through the bundles
|
||||
List<Bundle> myBundles = parentItem.getBundles();
|
||||
|
||||
for (Bundle myBundle : emptyIfNull(myBundles)) {
|
||||
if (StringUtils.equals(FULLTEXT_BUNDLE, myBundle.getName())) {
|
||||
// a-ha! grab the text out of the bitstreams
|
||||
List<Bitstream> bitstreams = myBundle.getBitstreams();
|
||||
|
||||
for (Bitstream fulltextBitstream : emptyIfNull(bitstreams)) {
|
||||
fullTextStreams.add(new FullTextBitstream(sourceInfo, fulltextBitstream));
|
||||
|
||||
log.debug("Added BitStream: "
|
||||
+ fulltextBitstream.getStoreNumber() + " "
|
||||
+ fulltextBitstream.getSequenceID() + " "
|
||||
+ fulltextBitstream.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return StringUtils.join(Iterables.transform(fullTextStreams, new Function<FullTextBitstream, String>() {
|
||||
@Nullable
|
||||
@Override
|
||||
public String apply(@Nullable FullTextBitstream input) {
|
||||
return input == null ? "" : input.getFileName();
|
||||
}
|
||||
}), ";");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getSize() {
|
||||
long result = 0;
|
||||
|
||||
if(CollectionUtils.isNotEmpty(fullTextStreams)) {
|
||||
Iterable<Long> individualSizes = Iterables.transform(fullTextStreams, new Function<FullTextBitstream, Long>() {
|
||||
@Nullable
|
||||
@Override
|
||||
public Long apply(@Nullable FullTextBitstream input) {
|
||||
return input == null ? 0L : input.getSize();
|
||||
}
|
||||
});
|
||||
|
||||
for (Long size : individualSizes) {
|
||||
result += size;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getReader() throws IOException {
|
||||
return super.getReader();
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getStream() throws IOException {
|
||||
try {
|
||||
return new SequenceInputStream(new FullTextEnumeration(fullTextStreams.iterator()));
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to add full text bitstreams to SOLR for item " + sourceInfo + ": " + e.getMessage(), e);
|
||||
return new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return CollectionUtils.isEmpty(fullTextStreams);
|
||||
}
|
||||
|
||||
private BitstreamService getBitstreamService() {
|
||||
if(bitstreamService == null) {
|
||||
bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
|
||||
}
|
||||
return bitstreamService;
|
||||
}
|
||||
|
||||
private class FullTextBitstream {
|
||||
private String itemHandle;
|
||||
private Bitstream bitstream;
|
||||
|
||||
public FullTextBitstream(final String parentHandle, final Bitstream file) {
|
||||
this.itemHandle = parentHandle;
|
||||
this.bitstream = file;
|
||||
}
|
||||
|
||||
public String getContentType(final Context context) throws SQLException {
|
||||
BitstreamFormat format = bitstream.getFormat(context);
|
||||
return format == null ? null : StringUtils.trimToEmpty(format.getMIMEType());
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return StringUtils.trimToEmpty(bitstream.getName());
|
||||
}
|
||||
|
||||
public long getSize() {
|
||||
return bitstream.getSize();
|
||||
}
|
||||
|
||||
public InputStream getInputStream() throws SQLException, IOException, AuthorizeException {
|
||||
return getBitstreamService().retrieve(context, bitstream);
|
||||
}
|
||||
|
||||
public String getItemHandle() {
|
||||
return itemHandle;
|
||||
}
|
||||
}
|
||||
|
||||
private class FullTextEnumeration implements Enumeration<InputStream> {
|
||||
|
||||
private final Iterator<FullTextBitstream> fulltextIterator;
|
||||
|
||||
public FullTextEnumeration(final Iterator<FullTextBitstream> fulltextStreams) {
|
||||
this.fulltextIterator = fulltextStreams;
|
||||
}
|
||||
|
||||
public boolean hasMoreElements() {
|
||||
return fulltextIterator.hasNext();
|
||||
}
|
||||
|
||||
public InputStream nextElement() {
|
||||
InputStream inputStream = null;
|
||||
FullTextBitstream bitstream = null;
|
||||
|
||||
try {
|
||||
bitstream = fulltextIterator.next();
|
||||
inputStream = bitstream.getInputStream();
|
||||
} catch (Exception e) {
|
||||
log.warn("Unable to add full text bitstream " + (bitstream == null ? "NULL" :
|
||||
bitstream.getFileName() + " for item " + bitstream.getItemHandle())
|
||||
+ " to SOLR:" + e.getMessage(), e);
|
||||
|
||||
inputStream = new ByteArrayInputStream((e.getClass() + ": " + e.getMessage()).getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return inputStream == null ? null : new SequenceInputStream(
|
||||
new ByteArrayInputStream("\n".getBytes(Charsets.UTF_8)), inputStream);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -770,19 +770,15 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
* @throws IOException
|
||||
* A general class of exceptions produced by failed or interrupted I/O operations.
|
||||
*/
|
||||
protected void writeDocument(SolrInputDocument doc, List<BitstreamContentStream> streams) throws IOException {
|
||||
protected void writeDocument(SolrInputDocument doc, FullTextContentStreams streams) throws IOException {
|
||||
|
||||
try {
|
||||
if (getSolr() != null)
|
||||
{
|
||||
if (CollectionUtils.isNotEmpty(streams))
|
||||
if (streams != null && !streams.isEmpty())
|
||||
{
|
||||
ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract");
|
||||
|
||||
for(BitstreamContentStream bce : streams)
|
||||
{
|
||||
req.addContentStream(bce);
|
||||
}
|
||||
req.addContentStream(streams);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
||||
@@ -1421,48 +1417,6 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
|
||||
log.debug(" Added Grouping");
|
||||
|
||||
|
||||
|
||||
List<BitstreamContentStream> streams = new ArrayList<BitstreamContentStream>();
|
||||
|
||||
try {
|
||||
// now get full text of any bitstreams in the TEXT bundle
|
||||
// trundle through the bundles
|
||||
List<Bundle> myBundles = item.getBundles();
|
||||
|
||||
for (Bundle myBundle : myBundles)
|
||||
{
|
||||
if ((myBundle.getName() != null)
|
||||
&& myBundle.getName().equals("TEXT"))
|
||||
{
|
||||
// a-ha! grab the text out of the bitstreams
|
||||
List<Bitstream> bitstreams = myBundle.getBitstreams();
|
||||
|
||||
for (Bitstream myBitstream : bitstreams)
|
||||
{
|
||||
try {
|
||||
|
||||
streams.add(new BitstreamContentStream(context, myBitstream));
|
||||
|
||||
log.debug(" Added BitStream: "
|
||||
+ myBitstream.getStoreNumber() + " "
|
||||
+ myBitstream.getSequenceID() + " "
|
||||
+ myBitstream.getName());
|
||||
|
||||
} catch (Exception e)
|
||||
{
|
||||
// this will never happen, but compiler is now
|
||||
// happy.
|
||||
log.trace(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (RuntimeException e)
|
||||
{
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
//Do any additional indexing, depends on the plugins
|
||||
List<SolrServiceIndexPlugin> solrServiceIndexPlugins = DSpaceServicesFactory.getInstance().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class);
|
||||
for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins)
|
||||
@@ -1472,7 +1426,7 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
|
||||
// write the index and close the inputstreamreaders
|
||||
try {
|
||||
writeDocument(doc, streams);
|
||||
writeDocument(doc, new FullTextContentStreams(context, item));
|
||||
log.info("Wrote Item: " + handle + " to Index");
|
||||
} catch (RuntimeException e)
|
||||
{
|
||||
|
@@ -0,0 +1,198 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.discovery;
|
||||
|
||||
import org.apache.commons.io.Charsets;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.Bundle;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.service.BitstreamService;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.runners.MockitoJUnitRunner;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@RunWith(MockitoJUnitRunner.class)
|
||||
public class FullTextContentStreamsTest {
|
||||
|
||||
private static final String HANDLE = "1234567/123";
|
||||
private static final String CONTENT_TYPE = "text/plain";
|
||||
|
||||
@InjectMocks
|
||||
private FullTextContentStreams streams;
|
||||
|
||||
@Mock
|
||||
private BitstreamService bitstreamService;
|
||||
|
||||
@Mock
|
||||
private Item item;
|
||||
|
||||
@Mock
|
||||
private Bundle originalBundle;
|
||||
|
||||
@Mock
|
||||
private Bundle textBundle;
|
||||
|
||||
@Mock
|
||||
private Bitstream textBitstream1;
|
||||
|
||||
@Mock
|
||||
private Bitstream textBitstream2;
|
||||
|
||||
@Mock
|
||||
private Bitstream textBitstream3;
|
||||
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
when(item.getHandle()).thenReturn(HANDLE);
|
||||
when(originalBundle.getName()).thenReturn("ORIGINAL");
|
||||
when(textBundle.getName()).thenReturn("TEXT");
|
||||
|
||||
when(textBitstream1.getName()).thenReturn("Full Text 1");
|
||||
when(textBitstream2.getName()).thenReturn("Full Text 2");
|
||||
when(textBitstream3.getName()).thenReturn("Full Text 3");
|
||||
|
||||
when(textBitstream1.getSize()).thenReturn(1L);
|
||||
when(textBitstream2.getSize()).thenReturn(2L);
|
||||
when(textBitstream3.getSize()).thenReturn(3L);
|
||||
|
||||
when(bitstreamService.retrieve(null, textBitstream1)).thenReturn(new ByteArrayInputStream("This is text 1".getBytes(Charsets.UTF_8)));
|
||||
when(bitstreamService.retrieve(null, textBitstream2)).thenReturn(new ByteArrayInputStream("This is text 2".getBytes(Charsets.UTF_8)));
|
||||
when(bitstreamService.retrieve(null, textBitstream3)).thenReturn(new ByteArrayInputStream("This is text 3".getBytes(Charsets.UTF_8)));
|
||||
|
||||
streams.bitstreamService = bitstreamService;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemWithNoBundles() throws Exception {
|
||||
when(item.getBundles()).thenReturn(null);
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should be empty", "", streams.getName());
|
||||
assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
|
||||
assertTrue("Content stream should be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
assertEquals("Input stream should be empty", -1, inputStream.read());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemWithOnlyOriginalBundle() throws Exception {
|
||||
when(item.getBundles()).thenReturn(Arrays.asList(originalBundle));
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should be empty", "", streams.getName());
|
||||
assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
|
||||
assertTrue("Content stream should be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
assertEquals("Input stream should be empty", -1, inputStream.read());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemWithEmptyTextBundle() throws Exception {
|
||||
when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
|
||||
when(textBundle.getBitstreams()).thenReturn(null);
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should be empty", "", streams.getName());
|
||||
assertEquals("The size of the streams should be zero", (Long) 0L, streams.getSize());
|
||||
assertTrue("Content stream should be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
assertEquals("Input stream should be empty", -1, inputStream.read());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemWithOnlyOneTextBitstream() throws Exception {
|
||||
when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
|
||||
when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1));
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should match the name of the bitstream", "Full Text 1", streams.getName());
|
||||
assertEquals("The size of the streams should match the size of bitstream 1", (Long) 1L, streams.getSize());
|
||||
assertFalse("Content stream should not be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
assertEquals("The data in the input stream should match the text of the bitstream", "\nThis is text 1",
|
||||
IOUtils.toString(inputStream, Charsets.UTF_8));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemWithMultipleTextBitstreams() throws Exception {
|
||||
when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
|
||||
when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3));
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should match the concatenation of the names of the bitstreams",
|
||||
"Full Text 1;Full Text 2;Full Text 3", streams.getName());
|
||||
assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize());
|
||||
assertFalse("Content stream should not be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
assertEquals("The data in the input stream should match 'This is text 1'", "\nThis is text 1" +
|
||||
"\nThis is text 2\nThis is text 3", IOUtils.toString(inputStream, Charsets.UTF_8));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBitstreamThrowingExceptionShouldNotStopIndexing() throws Exception {
|
||||
when(item.getBundles()).thenReturn(Arrays.asList(originalBundle, textBundle));
|
||||
when(textBundle.getBitstreams()).thenReturn(Arrays.asList(textBitstream1, textBitstream2, textBitstream3));
|
||||
when(bitstreamService.retrieve(null, textBitstream2)).thenThrow(new IOException("NOTFOUND"));
|
||||
|
||||
streams.init(item);
|
||||
|
||||
assertEquals("Source info should give you the handle", HANDLE, streams.getSourceInfo());
|
||||
assertEquals("Content type should be plain text", CONTENT_TYPE, streams.getContentType());
|
||||
assertEquals("The name should match the concatenation of the names of the bitstreams",
|
||||
"Full Text 1;Full Text 2;Full Text 3", streams.getName());
|
||||
assertEquals("The size of the streams should be the sum of the bitstream sizes", (Long) 6L, streams.getSize());
|
||||
assertFalse("Content stream should not be empty", streams.isEmpty());
|
||||
InputStream inputStream = streams.getStream();
|
||||
assertNotNull(inputStream);
|
||||
String content = IOUtils.toString(inputStream, Charsets.UTF_8);
|
||||
assertTrue("The data should contain data of the first bitstream that is not corrupt",
|
||||
content.contains("This is text 1"));
|
||||
assertFalse("The data should NOT contain data of the second bitstream that is corrupt",
|
||||
content.contains("This is text 2"));
|
||||
assertTrue("The data should contain data of the third bistream that is not corrupt",
|
||||
content.contains("This is text 3"));
|
||||
assertTrue("The data should contain data on the exception that occurred",
|
||||
content.contains("java.io.IOException"));
|
||||
assertTrue("The data should contain data on the exception that occurred",
|
||||
content.contains("NOTFOUND"));
|
||||
}
|
||||
|
||||
}
|
@@ -231,7 +231,6 @@
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
Reference in New Issue
Block a user