mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-17 15:03:18 +00:00
DURACOM-199 fix sitemap generator for restricted content and improve performance
This commit is contained in:
@@ -11,7 +11,6 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
@@ -24,9 +23,6 @@ import org.apache.commons.collections4.CollectionUtils;
|
|||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.dspace.content.Collection;
|
|
||||||
import org.dspace.content.Community;
|
|
||||||
import org.dspace.content.Item;
|
|
||||||
import org.dspace.content.factory.ContentServiceFactory;
|
import org.dspace.content.factory.ContentServiceFactory;
|
||||||
import org.dspace.content.service.CollectionService;
|
import org.dspace.content.service.CollectionService;
|
||||||
import org.dspace.content.service.CommunityService;
|
import org.dspace.content.service.CommunityService;
|
||||||
@@ -35,6 +31,7 @@ import org.dspace.core.Context;
|
|||||||
import org.dspace.core.LogHelper;
|
import org.dspace.core.LogHelper;
|
||||||
import org.dspace.discovery.DiscoverQuery;
|
import org.dspace.discovery.DiscoverQuery;
|
||||||
import org.dspace.discovery.DiscoverResult;
|
import org.dspace.discovery.DiscoverResult;
|
||||||
|
import org.dspace.discovery.IndexableObject;
|
||||||
import org.dspace.discovery.SearchService;
|
import org.dspace.discovery.SearchService;
|
||||||
import org.dspace.discovery.SearchServiceException;
|
import org.dspace.discovery.SearchServiceException;
|
||||||
import org.dspace.discovery.SearchUtils;
|
import org.dspace.discovery.SearchUtils;
|
||||||
@@ -60,6 +57,7 @@ public class GenerateSitemaps {
|
|||||||
private static final ConfigurationService configurationService =
|
private static final ConfigurationService configurationService =
|
||||||
DSpaceServicesFactory.getInstance().getConfigurationService();
|
DSpaceServicesFactory.getInstance().getConfigurationService();
|
||||||
private static final SearchService searchService = SearchUtils.getSearchService();
|
private static final SearchService searchService = SearchUtils.getSearchService();
|
||||||
|
private static final int PAGE_SIZE = 100;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default constructor
|
* Default constructor
|
||||||
@@ -183,96 +181,113 @@ public class GenerateSitemaps {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Context c = new Context(Context.Mode.READ_ONLY);
|
Context c = new Context(Context.Mode.READ_ONLY);
|
||||||
|
int offset = 0;
|
||||||
|
long commsCount = 0;
|
||||||
|
long collsCount = 0;
|
||||||
|
long itemsCount = 0;
|
||||||
|
|
||||||
List<Community> comms = communityService.findAll(c);
|
try {
|
||||||
|
DiscoverQuery discoveryQuery = new DiscoverQuery();
|
||||||
|
discoveryQuery.setMaxResults(PAGE_SIZE);
|
||||||
|
discoveryQuery.setQuery("search.resourcetype:Community");
|
||||||
|
do {
|
||||||
|
discoveryQuery.setStart(offset);
|
||||||
|
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
|
||||||
|
List<IndexableObject> docs = discoverResult.getIndexableObjects();
|
||||||
|
commsCount = discoverResult.getTotalSearchResults();
|
||||||
|
|
||||||
for (Community comm : comms) {
|
for (IndexableObject doc : docs) {
|
||||||
String url = uiURLStem + "communities/" + comm.getID();
|
String url = uiURLStem + "communities/" + doc.getID();
|
||||||
|
c.uncacheEntity(doc.getIndexedObject());
|
||||||
|
|
||||||
|
if (makeHTMLMap) {
|
||||||
|
html.addURL(url, null);
|
||||||
|
}
|
||||||
|
if (makeSitemapOrg) {
|
||||||
|
sitemapsOrg.addURL(url, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset += PAGE_SIZE;
|
||||||
|
} while (offset < commsCount);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
discoveryQuery = new DiscoverQuery();
|
||||||
|
discoveryQuery.setMaxResults(PAGE_SIZE);
|
||||||
|
discoveryQuery.setQuery("search.resourcetype:Collection");
|
||||||
|
do {
|
||||||
|
discoveryQuery.setStart(offset);
|
||||||
|
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
|
||||||
|
List<IndexableObject> docs = discoverResult.getIndexableObjects();
|
||||||
|
collsCount = discoverResult.getTotalSearchResults();
|
||||||
|
|
||||||
|
for (IndexableObject doc : docs) {
|
||||||
|
String url = uiURLStem + "collections/" + doc.getID();
|
||||||
|
c.uncacheEntity(doc.getIndexedObject());
|
||||||
|
|
||||||
|
if (makeHTMLMap) {
|
||||||
|
html.addURL(url, null);
|
||||||
|
}
|
||||||
|
if (makeSitemapOrg) {
|
||||||
|
sitemapsOrg.addURL(url, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset += PAGE_SIZE;
|
||||||
|
} while (offset < collsCount);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
discoveryQuery = new DiscoverQuery();
|
||||||
|
discoveryQuery.setMaxResults(PAGE_SIZE);
|
||||||
|
discoveryQuery.setQuery("search.resourcetype:Item");
|
||||||
|
discoveryQuery.addSearchField("search.entitytype");
|
||||||
|
do {
|
||||||
|
|
||||||
|
discoveryQuery.setStart(offset);
|
||||||
|
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
|
||||||
|
List<IndexableObject> docs = discoverResult.getIndexableObjects();
|
||||||
|
itemsCount = discoverResult.getTotalSearchResults();
|
||||||
|
|
||||||
|
for (IndexableObject doc : docs) {
|
||||||
|
String url;
|
||||||
|
List<String> entityTypeFieldValues = discoverResult.getSearchDocument(doc).get(0)
|
||||||
|
.getSearchFieldValues("search.entitytype");
|
||||||
|
if (CollectionUtils.isNotEmpty(entityTypeFieldValues)) {
|
||||||
|
url = uiURLStem + "entities/" + StringUtils.lowerCase(entityTypeFieldValues.get(0)) + "/"
|
||||||
|
+ doc.getID();
|
||||||
|
} else {
|
||||||
|
url = uiURLStem + "items/" + doc.getID();
|
||||||
|
}
|
||||||
|
Date lastMod = doc.getLastModified();
|
||||||
|
c.uncacheEntity(doc.getIndexedObject());
|
||||||
|
|
||||||
|
if (makeHTMLMap) {
|
||||||
|
html.addURL(url, null);
|
||||||
|
}
|
||||||
|
if (makeSitemapOrg) {
|
||||||
|
sitemapsOrg.addURL(url, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset += PAGE_SIZE;
|
||||||
|
} while (offset < itemsCount);
|
||||||
|
|
||||||
if (makeHTMLMap) {
|
if (makeHTMLMap) {
|
||||||
html.addURL(url, null);
|
int files = html.finish();
|
||||||
|
log.info(LogHelper.getHeader(c, "write_sitemap",
|
||||||
|
"type=html,num_files=" + files + ",communities="
|
||||||
|
+ commsCount + ",collections=" + collsCount
|
||||||
|
+ ",items=" + itemsCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (makeSitemapOrg) {
|
if (makeSitemapOrg) {
|
||||||
sitemapsOrg.addURL(url, null);
|
int files = sitemapsOrg.finish();
|
||||||
|
log.info(LogHelper.getHeader(c, "write_sitemap",
|
||||||
|
"type=html,num_files=" + files + ",communities="
|
||||||
|
+ commsCount + ",collections=" + collsCount
|
||||||
|
+ ",items=" + itemsCount));
|
||||||
}
|
}
|
||||||
|
} catch (SearchServiceException e) {
|
||||||
c.uncacheEntity(comm);
|
throw new RuntimeException(e);
|
||||||
|
} finally {
|
||||||
|
c.abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Collection> colls = collectionService.findAll(c);
|
|
||||||
|
|
||||||
for (Collection coll : colls) {
|
|
||||||
String url = uiURLStem + "collections/" + coll.getID();
|
|
||||||
|
|
||||||
if (makeHTMLMap) {
|
|
||||||
html.addURL(url, null);
|
|
||||||
}
|
|
||||||
if (makeSitemapOrg) {
|
|
||||||
sitemapsOrg.addURL(url, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.uncacheEntity(coll);
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator<Item> allItems = itemService.findAll(c);
|
|
||||||
int itemCount = 0;
|
|
||||||
|
|
||||||
while (allItems.hasNext()) {
|
|
||||||
Item i = allItems.next();
|
|
||||||
|
|
||||||
DiscoverQuery entityQuery = new DiscoverQuery();
|
|
||||||
entityQuery.setQuery("search.uniqueid:\"Item-" + i.getID() + "\" and entityType:*");
|
|
||||||
entityQuery.addSearchField("entityType");
|
|
||||||
|
|
||||||
try {
|
|
||||||
DiscoverResult discoverResult = searchService.search(c, entityQuery);
|
|
||||||
|
|
||||||
String url;
|
|
||||||
if (CollectionUtils.isNotEmpty(discoverResult.getIndexableObjects())
|
|
||||||
&& CollectionUtils.isNotEmpty(discoverResult.getSearchDocument(
|
|
||||||
discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType"))
|
|
||||||
&& StringUtils.isNotBlank(discoverResult.getSearchDocument(
|
|
||||||
discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType").get(0))
|
|
||||||
) {
|
|
||||||
url = uiURLStem + "entities/" + StringUtils.lowerCase(discoverResult.getSearchDocument(
|
|
||||||
discoverResult.getIndexableObjects().get(0))
|
|
||||||
.get(0).getSearchFieldValues("entityType").get(0)) + "/" + i.getID();
|
|
||||||
} else {
|
|
||||||
url = uiURLStem + "items/" + i.getID();
|
|
||||||
}
|
|
||||||
Date lastMod = i.getLastModified();
|
|
||||||
|
|
||||||
if (makeHTMLMap) {
|
|
||||||
html.addURL(url, lastMod);
|
|
||||||
}
|
|
||||||
if (makeSitemapOrg) {
|
|
||||||
sitemapsOrg.addURL(url, lastMod);
|
|
||||||
}
|
|
||||||
} catch (SearchServiceException e) {
|
|
||||||
log.error("Failed getting entitytype through solr for item " + i.getID() + ": " + e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
c.uncacheEntity(i);
|
|
||||||
|
|
||||||
itemCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (makeHTMLMap) {
|
|
||||||
int files = html.finish();
|
|
||||||
log.info(LogHelper.getHeader(c, "write_sitemap",
|
|
||||||
"type=html,num_files=" + files + ",communities="
|
|
||||||
+ comms.size() + ",collections=" + colls.size()
|
|
||||||
+ ",items=" + itemCount));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (makeSitemapOrg) {
|
|
||||||
int files = sitemapsOrg.finish();
|
|
||||||
log.info(LogHelper.getHeader(c, "write_sitemap",
|
|
||||||
"type=html,num_files=" + files + ",communities="
|
|
||||||
+ comms.size() + ",collections=" + colls.size()
|
|
||||||
+ ",items=" + itemCount));
|
|
||||||
}
|
|
||||||
|
|
||||||
c.abort();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1031,9 +1031,8 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
|||||||
// Add information about our search fields
|
// Add information about our search fields
|
||||||
for (String field : searchFields) {
|
for (String field : searchFields) {
|
||||||
List<String> valuesAsString = new ArrayList<>();
|
List<String> valuesAsString = new ArrayList<>();
|
||||||
for (Object o : doc.getFieldValues(field)) {
|
Optional.ofNullable(doc.getFieldValues(field))
|
||||||
valuesAsString.add(String.valueOf(o));
|
.ifPresent(l -> l.forEach(o -> valuesAsString.add(String.valueOf(o))));
|
||||||
}
|
|
||||||
resultDoc.addSearchField(field, valuesAsString.toArray(new String[valuesAsString.size()]));
|
resultDoc.addSearchField(field, valuesAsString.toArray(new String[valuesAsString.size()]));
|
||||||
}
|
}
|
||||||
result.addSearchDocument(indexableObject, resultDoc);
|
result.addSearchDocument(indexableObject, resultDoc);
|
||||||
|
@@ -236,8 +236,37 @@ public class SitemapRestControllerIT extends AbstractControllerIntegrationTest {
|
|||||||
.andReturn();
|
.andReturn();
|
||||||
|
|
||||||
String response = result.getResponse().getContentAsString();
|
String response = result.getResponse().getContentAsString();
|
||||||
|
// contains a link to communities: [dspace.ui.url]/communities/<uuid>
|
||||||
|
assertTrue(response
|
||||||
|
.contains(configurationService.getProperty("dspace.ui.url") + "/communities/" + community.getID()));
|
||||||
|
// contains a link to collections: [dspace.ui.url]/collections/<uuid>
|
||||||
|
assertTrue(response
|
||||||
|
.contains(configurationService.getProperty("dspace.ui.url") + "/collections/" + collection.getID()));
|
||||||
// contains a link to items: [dspace.ui.url]/items/<uuid>
|
// contains a link to items: [dspace.ui.url]/items/<uuid>
|
||||||
assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item1.getID()));
|
assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item1.getID()));
|
||||||
assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item2.getID()));
|
assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item2.getID()));
|
||||||
|
// contains proper link to entities items
|
||||||
|
assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/"
|
||||||
|
+ entityPublication.getID()));
|
||||||
|
assertFalse(response
|
||||||
|
.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublication.getID()));
|
||||||
|
// does not contain links to restricted content
|
||||||
|
assertFalse(response.contains(
|
||||||
|
configurationService.getProperty("dspace.ui.url") + "/communities/" + communityRestricted.getID()));
|
||||||
|
assertFalse(response.contains(
|
||||||
|
configurationService.getProperty("dspace.ui.url") + "/collections/" + collectionRestricted.getID()));
|
||||||
|
assertFalse(response
|
||||||
|
.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemRestricted.getID()));
|
||||||
|
assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/"
|
||||||
|
+ entityPublicationRestricted.getID()));
|
||||||
|
assertFalse(response.contains(
|
||||||
|
configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublicationRestricted.getID()));
|
||||||
|
// does not contain links to undiscoverable content
|
||||||
|
assertFalse(response
|
||||||
|
.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemUndiscoverable.getID()));
|
||||||
|
assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/"
|
||||||
|
+ entityPublicationUndiscoverable.getID()));
|
||||||
|
assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/"
|
||||||
|
+ entityPublicationUndiscoverable.getID()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user