diff --git a/Dockerfile b/Dockerfile index ff35f2a2e3..102f00abe1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,14 +42,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.13 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code update_webapps diff --git a/Dockerfile.cli b/Dockerfile.cli index be03e8922b..e208e3d921 100644 --- a/Dockerfile.cli +++ b/Dockerfile.cli @@ -38,14 +38,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.13 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code diff --git a/Dockerfile.dependencies b/Dockerfile.dependencies index 794dfa9a66..d3ca9d5e3c 100644 --- a/Dockerfile.dependencies +++ b/Dockerfile.dependencies @@ -6,7 +6,7 @@ # To build with JDK17, use "--build-arg JDK_VERSION=17" ARG JDK_VERSION=11 -# Step 1 - Run Maven Build +# Step 1 - Download all Dependencies FROM docker.io/maven:3-eclipse-temurin-${JDK_VERSION} AS build ARG TARGET_DIR=dspace-installer WORKDIR /app @@ -19,16 +19,60 @@ RUN chown -Rv dspace: /app # Switch to dspace user & run below commands as that user USER dspace -# Copy the DSpace source code (from local machine) into the workdir (excluding .dockerignore contents) -ADD --chown=dspace . /app/ +# This next part may look odd, but it speeds up the build of this image *significantly*. +# Copy ONLY the POMs to this image (from local machine). This will allow us to download all dependencies *without* +# performing any code compilation steps. + +# Parent POM +ADD --chown=dspace pom.xml /app/ +RUN mkdir -p /app/dspace + +# 'dspace' module POM. Includes 'additions' ONLY, as it's the only submodule that is required to exist. +ADD --chown=dspace dspace/pom.xml /app/dspace/ +RUN mkdir -p /app/dspace/modules/ +ADD --chown=dspace dspace/modules/pom.xml /app/dspace/modules/ +RUN mkdir -p /app/dspace/modules/additions +ADD --chown=dspace dspace/modules/additions/pom.xml /app/dspace/modules/additions/ + +# 'dspace-api' module POM +RUN mkdir -p /app/dspace-api +ADD --chown=dspace dspace-api/pom.xml /app/dspace-api/ + +# 'dspace-iiif' module POM +RUN mkdir -p /app/dspace-iiif +ADD --chown=dspace dspace-iiif/pom.xml /app/dspace-iiif/ + +# 'dspace-oai' module POM +RUN mkdir -p /app/dspace-oai +ADD --chown=dspace dspace-oai/pom.xml /app/dspace-oai/ + +# 'dspace-rdf' module POM +RUN mkdir -p /app/dspace-rdf +ADD --chown=dspace dspace-rdf/pom.xml /app/dspace-rdf/ + +# 'dspace-server-webapp' module POM +RUN mkdir -p /app/dspace-server-webapp +ADD --chown=dspace dspace-server-webapp/pom.xml /app/dspace-server-webapp/ + +# 'dspace-services' module POM +RUN mkdir -p /app/dspace-services +ADD --chown=dspace dspace-services/pom.xml /app/dspace-services/ + +# 'dspace-sword' module POM +RUN mkdir -p /app/dspace-sword +ADD --chown=dspace dspace-sword/pom.xml /app/dspace-sword/ + +# 'dspace-swordv2' module POM +RUN mkdir -p /app/dspace-swordv2 +ADD --chown=dspace dspace-swordv2/pom.xml /app/dspace-swordv2/ # Trigger the installation of all maven dependencies (hide download progress messages) # Maven flags here ensure that we skip final assembly, skip building test environment and skip all code verification checks. -# These flags speed up this installation as much as reasonably possible. -ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxml.skip=true" -RUN mvn --no-transfer-progress install ${MAVEN_FLAGS} +# These flags speed up this installation and skip tasks we cannot perform as we don't have the full source code. +ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxjc.skip=true -Dxml.skip=true" +RUN mvn --no-transfer-progress verify ${MAVEN_FLAGS} -# Clear the contents of the /app directory (including all maven builds), so no artifacts remain. +# Clear the contents of the /app directory (including all maven target folders), so no artifacts remain. # This ensures when dspace:dspace is built, it will use the Maven local cache (~/.m2) for dependencies USER root RUN rm -rf /app/* diff --git a/Dockerfile.test b/Dockerfile.test index 08b6b3018b..cc73c655b6 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -41,14 +41,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.12 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code update_webapps diff --git a/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java b/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java index 2e4ed69b26..c787261419 100644 --- a/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java +++ b/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java @@ -281,10 +281,14 @@ public class LogAnalyser { */ private static String fileTemplate = "dspace\\.log.*"; + private static final ConfigurationService configurationService = + DSpaceServicesFactory.getInstance().getConfigurationService(); + /** * the configuration file from which to configure the analyser */ - private static String configFile; + private static String configFile = configurationService.getProperty("dspace.dir") + + File.separator + "config" + File.separator + "dstat.cfg"; /** * the output file to which to write aggregation data @@ -616,8 +620,6 @@ public class LogAnalyser { } // now do the host name and url lookup - ConfigurationService configurationService - = DSpaceServicesFactory.getInstance().getConfigurationService(); hostName = Utils.getHostName(configurationService.getProperty("dspace.ui.url")); name = configurationService.getProperty("dspace.name").trim(); url = configurationService.getProperty("dspace.ui.url").trim(); @@ -658,8 +660,6 @@ public class LogAnalyser { String myConfigFile, String myOutFile, Date myStartDate, Date myEndDate, boolean myLookUp) { - ConfigurationService configurationService - = DSpaceServicesFactory.getInstance().getConfigurationService(); if (myLogDir != null) { logDir = myLogDir; @@ -673,9 +673,6 @@ public class LogAnalyser { if (myConfigFile != null) { configFile = myConfigFile; - } else { - configFile = configurationService.getProperty("dspace.dir") - + File.separator + "config" + File.separator + "dstat.cfg"; } if (myStartDate != null) { diff --git a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java index 11f9aadd86..0a1e77ee72 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java +++ b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java @@ -163,7 +163,7 @@ public class DCInput { * The scope of the input sets, this restricts hidden metadata fields from * view by the end user during submission. */ - public static final String SUBMISSION_SCOPE = "submit"; + public static final String SUBMISSION_SCOPE = "submission"; /** * Class constructor for creating a DCInput object based on the contents of @@ -262,7 +262,7 @@ public class DCInput { /** * Is this DCInput for display in the given scope? The scope should be - * either "workflow" or "submit", as per the input forms definition. If the + * either "workflow" or "submission", as per the input forms definition. If the * internal visibility is set to "null" then this will always return true. * * @param scope String identifying the scope that this input's visibility diff --git a/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java b/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java index 351c362482..be7a34086a 100644 --- a/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java +++ b/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java @@ -422,9 +422,6 @@ public class BrowseEngine { } } - // this is the total number of results in answer to the query - int total = getTotalResults(true); - // set the ordering field (there is only one option) dao.setOrderField("sort_value"); @@ -444,6 +441,9 @@ public class BrowseEngine { dao.setOffset(offset); dao.setLimit(scope.getResultsPerPage()); + // this is the total number of results in answer to the query + int total = getTotalResults(true); + // Holder for the results List results = null; @@ -680,33 +680,9 @@ public class BrowseEngine { // tell the browse query whether we are distinct dao.setDistinct(distinct); - // ensure that the select is set to "*" - String[] select = {"*"}; - dao.setCountValues(select); - - // FIXME: it would be nice to have a good way of doing this in the DAO - // now reset all of the fields that we don't want to have constraining - // our count, storing them locally to reinstate later - String focusField = dao.getJumpToField(); - String focusValue = dao.getJumpToValue(); - int limit = dao.getLimit(); - int offset = dao.getOffset(); - - dao.setJumpToField(null); - dao.setJumpToValue(null); - dao.setLimit(-1); - dao.setOffset(-1); - // perform the query and get the result int count = dao.doCountQuery(); - // now put back the values we removed for this method - dao.setJumpToField(focusField); - dao.setJumpToValue(focusValue); - dao.setLimit(limit); - dao.setOffset(offset); - dao.setCountValues(null); - log.debug(LogHelper.getHeader(context, "get_total_results_return", "return=" + count)); return count; diff --git a/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java b/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java index f99aab852b..1917dec423 100644 --- a/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java +++ b/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java @@ -13,6 +13,8 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.apache.solr.client.solrj.util.ClientUtils; @@ -180,18 +182,33 @@ public class SolrBrowseDAO implements BrowseDAO { addDefaultFilterQueries(query); if (distinct) { DiscoverFacetField dff; + + // To get the number of distinct values we use the next "json.facet" query param + // {"entries_count": {"type":"terms","field": "_filter", "limit":0, "numBuckets":true}}" + ObjectNode jsonFacet = JsonNodeFactory.instance.objectNode(); + ObjectNode entriesCount = JsonNodeFactory.instance.objectNode(); + entriesCount.put("type", "terms"); + entriesCount.put("field", facetField + "_filter"); + entriesCount.put("limit", 0); + entriesCount.put("numBuckets", true); + jsonFacet.set("entries_count", entriesCount); + if (StringUtils.isNotBlank(startsWith)) { dff = new DiscoverFacetField(facetField, - DiscoveryConfigurationParameters.TYPE_TEXT, -1, - DiscoveryConfigurationParameters.SORT.VALUE, startsWith); + DiscoveryConfigurationParameters.TYPE_TEXT, limit, + DiscoveryConfigurationParameters.SORT.VALUE, startsWith, offset); + + // Add the prefix to the json facet query + entriesCount.put("prefix", startsWith); } else { dff = new DiscoverFacetField(facetField, - DiscoveryConfigurationParameters.TYPE_TEXT, -1, - DiscoveryConfigurationParameters.SORT.VALUE); + DiscoveryConfigurationParameters.TYPE_TEXT, limit, + DiscoveryConfigurationParameters.SORT.VALUE, offset); } query.addFacetField(dff); query.setFacetMinCount(1); query.setMaxResults(0); + query.addProperty("json.facet", jsonFacet.toString()); } else { query.setMaxResults(limit/* > 0 ? limit : 20*/); if (offset > 0) { @@ -248,8 +265,7 @@ public class SolrBrowseDAO implements BrowseDAO { DiscoverResult resp = getSolrResponse(); int count = 0; if (distinct) { - List facetResults = resp.getFacetResult(facetField); - count = facetResults.size(); + count = (int) resp.getTotalEntries(); } else { // we need to cast to int to respect the BrowseDAO contract... count = (int) resp.getTotalSearchResults(); @@ -266,8 +282,8 @@ public class SolrBrowseDAO implements BrowseDAO { DiscoverResult resp = getSolrResponse(); List facet = resp.getFacetResult(facetField); int count = doCountQuery(); - int start = offset > 0 ? offset : 0; - int max = limit > 0 ? limit : count; //if negative, return everything + int start = 0; + int max = facet.size(); List result = new ArrayList<>(); if (ascending) { for (int i = start; i < (start + max) && i < count; i++) { diff --git a/dspace-api/src/main/java/org/dspace/core/Context.java b/dspace-api/src/main/java/org/dspace/core/Context.java index 02a3fee09f..dab6ab7fbd 100644 --- a/dspace-api/src/main/java/org/dspace/core/Context.java +++ b/dspace-api/src/main/java/org/dspace/core/Context.java @@ -883,7 +883,19 @@ public class Context implements AutoCloseable { } /** - * Remove an entity from the cache. This is necessary when batch processing a large number of items. + * Remove all entities from the cache and reload the current user entity. This is useful when batch processing + * a large number of entities when the calling code requires the cache to be completely cleared before continuing. + * + * @throws SQLException if a database error occurs. + */ + public void uncacheEntities() throws SQLException { + dbConnection.uncacheEntities(); + reloadContextBoundEntities(); + } + + /** + * Remove an entity from the cache. This is useful when batch processing a large number of entities + * when the calling code needs to retain some items in the cache while removing others. * * @param entity The entity to reload * @param The class of the entity. The entity must implement the {@link ReloadableEntity} interface. diff --git a/dspace-api/src/main/java/org/dspace/core/DBConnection.java b/dspace-api/src/main/java/org/dspace/core/DBConnection.java index 66e4a65dbf..c9c4ce0953 100644 --- a/dspace-api/src/main/java/org/dspace/core/DBConnection.java +++ b/dspace-api/src/main/java/org/dspace/core/DBConnection.java @@ -124,28 +124,38 @@ public interface DBConnection { public long getCacheSize() throws SQLException; /** - * Reload a DSpace object from the database. This will make sure the object + * Reload an entity from the database. This will make sure the object * is valid and stored in the cache. The returned object should be used * henceforth instead of the passed object. * - * @param type of {@link entity} - * @param entity The DSpace object to reload + * @param type of entity. + * @param entity The entity to reload. * @return the reloaded entity. - * @throws java.sql.SQLException passed through. + * @throws SQLException passed through. */ public E reloadEntity(E entity) throws SQLException; /** - * Remove a DSpace object from the session cache when batch processing a - * large number of objects. + * Remove all entities from the session cache. * - *

Objects removed from cache are not saved in any way. Therefore, if you - * have modified an object, you should be sure to {@link commit()} changes + *

Entities removed from cache are not saved in any way. Therefore, if you + * have modified any entities, you should be sure to {@link #commit()} changes * before calling this method. * - * @param Type of {@link entity} - * @param entity The DSpace object to decache. - * @throws java.sql.SQLException passed through. + * @throws SQLException passed through. + */ + public void uncacheEntities() throws SQLException; + + /** + * Remove an entity from the session cache. + * + *

Entities removed from cache are not saved in any way. Therefore, if you + * have modified the entity, you should be sure to {@link #commit()} changes + * before calling this method. + * + * @param Type of entity. + * @param entity The entity to decache. + * @throws SQLException passed through. */ public void uncacheEntity(E entity) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java b/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java index b371af80ee..bd00b844ba 100644 --- a/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java +++ b/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java @@ -243,6 +243,11 @@ public class HibernateDBConnection implements DBConnection { } } + @Override + public void uncacheEntities() throws SQLException { + getSession().clear(); + } + /** * Evict an entity from the hibernate cache. *

diff --git a/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java b/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java index 00236d2bfe..a56804e3e7 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java +++ b/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java @@ -32,6 +32,9 @@ public class DiscoverResult { private List indexableObjects; private Map> facetResults; + // Total count of facet entries calculated for a metadata browsing query + private long totalEntries; + /** * A map that contains all the documents sougth after, the key is a string representation of the Indexable Object */ @@ -64,6 +67,14 @@ public class DiscoverResult { this.totalSearchResults = totalSearchResults; } + public long getTotalEntries() { + return totalEntries; + } + + public void setTotalEntries(long totalEntries) { + this.totalEntries = totalEntries; + } + public int getStart() { return start; } diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index cd3797e3e3..9339b574b5 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -1055,6 +1055,8 @@ public class SolrServiceImpl implements SearchService, IndexingService { } //Resolve our facet field values resolveFacetFields(context, query, result, skipLoadingResponse, solrQueryResponse); + //Add total entries count for metadata browsing + resolveEntriesCount(result, solrQueryResponse); } // If any stale entries are found in the current page of results, // we remove those stale entries and rerun the same query again. @@ -1080,7 +1082,39 @@ public class SolrServiceImpl implements SearchService, IndexingService { return result; } + /** + * Stores the total count of entries for metadata index browsing. The count is calculated by the + * json.facet parameter with the following value: + * + *


+     * {
+     *     "entries_count": {
+     *         "type": "terms",
+     *         "field": "facetNameField_filter",
+     *         "limit": 0,
+     *         "prefix": "prefix_value",
+     *         "numBuckets": true
+     *     }
+     * }
+     * 
+ * + * This value is returned in the facets field of the Solr response. + * + * @param result DiscoverResult object where the total entries count will be stored + * @param solrQueryResponse QueryResponse object containing the solr response + */ + private void resolveEntriesCount(DiscoverResult result, QueryResponse solrQueryResponse) { + Object facetsObj = solrQueryResponse.getResponse().get("facets"); + if (facetsObj instanceof NamedList) { + NamedList facets = (NamedList) facetsObj; + Object bucketsInfoObj = facets.get("entries_count"); + if (bucketsInfoObj instanceof NamedList) { + NamedList bucketsInfo = (NamedList) bucketsInfoObj; + result.setTotalEntries((int) bucketsInfo.get("numBuckets")); + } + } + } private void resolveFacetFields(Context context, DiscoverQuery query, DiscoverResult result, boolean skipLoadingResponse, QueryResponse solrQueryResponse) throws SQLException { diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java index f1ae137b91..c9a865ec85 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java @@ -118,20 +118,10 @@ public abstract class IndexFactoryImpl implements ParseContext tikaContext = new ParseContext(); // Use Apache Tika to parse the full text stream(s) + boolean extractionSucceeded = false; try (InputStream fullTextStreams = streams.getStream()) { tikaParser.parse(fullTextStreams, tikaHandler, tikaMetadata, tikaContext); - - // Write Tika metadata to "tika_meta_*" fields. - // This metadata is not very useful right now, - // but we'll keep it just in case it becomes more useful. - for (String name : tikaMetadata.names()) { - for (String value : tikaMetadata.getValues(name)) { - doc.addField("tika_meta_" + name, value); - } - } - - // Save (parsed) full text to "fulltext" field - doc.addField("fulltext", tikaHandler.toString()); + extractionSucceeded = true; } catch (SAXException saxe) { // Check if this SAXException is just a notice that this file was longer than the character limit. // Unfortunately there is not a unique, public exception type to catch here. This error is thrown @@ -141,6 +131,7 @@ public abstract class IndexFactoryImpl implements // log that we only indexed up to that configured limit log.info("Full text is larger than the configured limit (discovery.solr.fulltext.charLimit)." + " Only the first {} characters were indexed.", charLimit); + extractionSucceeded = true; } else { log.error("Tika parsing error. Could not index full text.", saxe); throw new IOException("Tika parsing error. Could not index full text.", saxe); @@ -148,11 +139,19 @@ public abstract class IndexFactoryImpl implements } catch (TikaException | IOException ex) { log.error("Tika parsing error. Could not index full text.", ex); throw new IOException("Tika parsing error. Could not index full text.", ex); - } finally { - // Add document to index - solr.add(doc); } - return; + if (extractionSucceeded) { + // Write Tika metadata to "tika_meta_*" fields. + // This metadata is not very useful right now, + // but we'll keep it just in case it becomes more useful. + for (String name : tikaMetadata.names()) { + for (String value : tikaMetadata.getValues(name)) { + doc.addField("tika_meta_" + name, value); + } + } + // Save (parsed) full text to "fulltext" field + doc.addField("fulltext", tikaHandler.toString()); + } } // Add document to index solr.add(doc); diff --git a/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java b/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java index 09bdf34d4c..58781cd402 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java +++ b/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java @@ -14,6 +14,7 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; +import javax.persistence.UniqueConstraint; import org.hibernate.proxy.HibernateProxyHelper; @@ -23,7 +24,7 @@ import org.hibernate.proxy.HibernateProxyHelper; * @author kevinvandevelde at atmire.com */ @Entity -@Table(name = "group2groupcache") +@Table(name = "group2groupcache", uniqueConstraints = { @UniqueConstraint(columnNames = {"parent_id", "child_id"}) }) public class Group2GroupCache implements Serializable { @Id diff --git a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java index 3fb20e2f1e..4cec4c9c0d 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java @@ -20,6 +20,7 @@ import java.util.Set; import java.util.UUID; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.collections4.SetUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; @@ -673,15 +674,14 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements /** - * Regenerate the group cache AKA the group2groupcache table in the database - - * meant to be called when a group is added or removed from another group + * Returns a set with pairs of parent and child group UUIDs, representing the new cache table rows. * - * @param context The relevant DSpace Context. - * @param flushQueries flushQueries Flush all pending queries + * @param context The relevant DSpace Context. + * @param flushQueries flushQueries Flush all pending queries + * @return Pairs of parent and child group UUID of the new cache. * @throws SQLException An exception that provides information on a database access error or other errors. */ - protected void rethinkGroupCache(Context context, boolean flushQueries) throws SQLException { - + private Set> computeNewCache(Context context, boolean flushQueries) throws SQLException { Map> parents = new HashMap<>(); List> group2groupResults = groupDAO.getGroup2GroupResults(context, flushQueries); @@ -689,19 +689,8 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements UUID parent = group2groupResult.getLeft(); UUID child = group2groupResult.getRight(); - // if parent doesn't have an entry, create one - if (!parents.containsKey(parent)) { - Set children = new HashSet<>(); - - // add child id to the list - children.add(child); - parents.put(parent, children); - } else { - // parent has an entry, now add the child to the parent's record - // of children - Set children = parents.get(parent); - children.add(child); - } + parents.putIfAbsent(parent, new HashSet<>()); + parents.get(parent).add(child); } // now parents is a hash of all of the IDs of groups that are parents @@ -714,28 +703,43 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements parent.getValue().addAll(myChildren); } - // empty out group2groupcache table - group2GroupCacheDAO.deleteAll(context); - - // write out new one + // write out new cache IN MEMORY ONLY and returns it + Set> newCache = new HashSet<>(); for (Map.Entry> parent : parents.entrySet()) { UUID key = parent.getKey(); - for (UUID child : parent.getValue()) { - - Group parentGroup = find(context, key); - Group childGroup = find(context, child); - - - if (parentGroup != null && childGroup != null && group2GroupCacheDAO - .find(context, parentGroup, childGroup) == null) { - Group2GroupCache group2GroupCache = group2GroupCacheDAO.create(context, new Group2GroupCache()); - group2GroupCache.setParent(parentGroup); - group2GroupCache.setChild(childGroup); - group2GroupCacheDAO.save(context, group2GroupCache); - } + newCache.add(Pair.of(key, child)); } } + return newCache; + } + + + /** + * Regenerate the group cache AKA the group2groupcache table in the database - + * meant to be called when a group is added or removed from another group + * + * @param context The relevant DSpace Context. + * @param flushQueries flushQueries Flush all pending queries + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + protected void rethinkGroupCache(Context context, boolean flushQueries) throws SQLException { + // current cache in the database + Set> oldCache = group2GroupCacheDAO.getCache(context); + + // correct cache, computed from the Group table + Set> newCache = computeNewCache(context, flushQueries); + + SetUtils.SetView> toDelete = SetUtils.difference(oldCache, newCache); + SetUtils.SetView> toCreate = SetUtils.difference(newCache, oldCache); + + for (Pair pair : toDelete ) { + group2GroupCacheDAO.deleteFromCache(context, pair.getLeft(), pair.getRight()); + } + + for (Pair pair : toCreate ) { + group2GroupCacheDAO.addToCache(context, pair.getLeft(), pair.getRight()); + } } @Override diff --git a/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java b/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java index 7db569a59e..d41d52c7e6 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java +++ b/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java @@ -9,7 +9,10 @@ package org.dspace.eperson.dao; import java.sql.SQLException; import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; import org.dspace.core.Context; import org.dspace.core.GenericDAO; import org.dspace.eperson.Group; @@ -25,13 +28,74 @@ import org.dspace.eperson.Group2GroupCache; */ public interface Group2GroupCacheDAO extends GenericDAO { - public List findByParent(Context context, Group group) throws SQLException; + /** + * Returns the current cache table as a set of UUID pairs. + * @param context The relevant DSpace Context. + * @return Set of UUID pairs, where the first element is the parent UUID and the second one is the child UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Set> getCache(Context context) throws SQLException; - public List findByChildren(Context context, Iterable groups) throws SQLException; + /** + * Returns all cache entities that are children of a given parent Group entity. + * @param context The relevant DSpace Context. + * @param group Parent group to perform the search. + * @return List of cached groups that are children of the parent group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + List findByParent(Context context, Group group) throws SQLException; - public Group2GroupCache findByParentAndChild(Context context, Group parent, Group child) throws SQLException; + /** + * Returns all cache entities that are parents of at least one group from a children groups list. + * @param context The relevant DSpace Context. + * @param groups Children groups to perform the search. + * @return List of cached groups that are parents of at least one group from the children groups list. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + List findByChildren(Context context, Iterable groups) throws SQLException; - public Group2GroupCache find(Context context, Group parent, Group child) throws SQLException; + /** + * Returns the cache entity given specific parent and child groups. + * @param context The relevant DSpace Context. + * @param parent Parent group. + * @param child Child gruoup. + * @return Cached group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Group2GroupCache findByParentAndChild(Context context, Group parent, Group child) throws SQLException; - public void deleteAll(Context context) throws SQLException; + /** + * Returns the cache entity given specific parent and child groups. + * @param context The relevant DSpace Context. + * @param parent Parent group. + * @param child Child gruoup. + * @return Cached group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Group2GroupCache find(Context context, Group parent, Group child) throws SQLException; + + /** + * Completely deletes the current cache table. + * @param context The relevant DSpace Context. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void deleteAll(Context context) throws SQLException; + + /** + * Deletes a specific cache row given parent and child groups UUIDs. + * @param context The relevant DSpace Context. + * @param parent Parent group UUID. + * @param child Child group UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void deleteFromCache(Context context, UUID parent, UUID child) throws SQLException; + + /** + * Adds a single row to the cache table given parent and child groups UUIDs. + * @param context The relevant DSpace Context. + * @param parent Parent group UUID. + * @param child Child group UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void addToCache(Context context, UUID parent, UUID child) throws SQLException; } diff --git a/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java b/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java index 83fb48aaf0..42ca2bb5d4 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java @@ -8,14 +8,18 @@ package org.dspace.eperson.dao.impl; import java.sql.SQLException; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; +import java.util.UUID; import javax.persistence.Query; import javax.persistence.criteria.CriteriaBuilder; import javax.persistence.criteria.CriteriaQuery; import javax.persistence.criteria.Predicate; import javax.persistence.criteria.Root; +import org.apache.commons.lang3.tuple.Pair; import org.dspace.core.AbstractHibernateDAO; import org.dspace.core.Context; import org.dspace.eperson.Group; @@ -35,6 +39,16 @@ public class Group2GroupCacheDAOImpl extends AbstractHibernateDAO> getCache(Context context) throws SQLException { + Query query = createQuery( + context, + "SELECT new org.apache.commons.lang3.tuple.ImmutablePair(g.parent.id, g.child.id) FROM Group2GroupCache g" + ); + List> results = query.getResultList(); + return new HashSet>(results); + } + @Override public List findByParent(Context context, Group group) throws SQLException { CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context); @@ -90,4 +104,24 @@ public class Group2GroupCacheDAOImpl extends AbstractHibernateDAO 1); + + context.uncacheEntities(); + + assertThat("Cache size should be one (current user)", context.getDBConnection().getCacheSize(), equalTo(1L)); + context.reloadEntity(context.getCurrentUser()); + assertThat("Cache should only contain the current user", context.getDBConnection().getCacheSize(), equalTo(1L)); + } + + @Test + public void testUncacheEntity() throws Throwable { + // Remember the cache size after loading an entity + Group group = groupService.findByName(context, Group.ANONYMOUS); + long oldCacheSize = context.getDBConnection().getCacheSize(); + + // Uncache the entity + context.uncacheEntity(group); + + long newCacheSize = context.getDBConnection().getCacheSize(); + assertThat("Cache size should be reduced by one", newCacheSize, equalTo(oldCacheSize - 1)); + } } diff --git a/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java b/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java index 093f693d56..302844ce62 100644 --- a/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java +++ b/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java @@ -205,6 +205,28 @@ public class HibernateDBConnectionTest extends AbstractUnitTest { .contains(person)); } + /** + * Test of uncacheEntities method + */ + @Test + public void testUncacheEntities() throws SQLException { + // Get DBConnection associated with DSpace Context + HibernateDBConnection dbConnection = (HibernateDBConnection) context.getDBConnection(); + EPerson person = context.getCurrentUser(); + + assertTrue("Current user should be cached in session", dbConnection.getSession() + .contains(person)); + + dbConnection.uncacheEntities(); + assertFalse("Current user should be gone from cache", dbConnection.getSession() + .contains(person)); + + // Test ability to reload an uncached entity + person = dbConnection.reloadEntity(person); + assertTrue("Current user should be cached back in session", dbConnection.getSession() + .contains(person)); + } + /** * Test of uncacheEntity method */ diff --git a/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java b/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java index c6aaaa34b5..2d26795778 100644 --- a/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java +++ b/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java @@ -334,6 +334,11 @@ public class XOAI { server.add(list); server.commit(); list.clear(); + try { + context.uncacheEntities(); + } catch (SQLException ex) { + log.error("Error uncaching entities", ex); + } } } System.out.println("Total: " + i + " items"); diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java index 978ae2ca92..f2fb12f2bd 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java @@ -67,8 +67,8 @@ public class SearchEventConverter { if (searchEventRest.getScope() != null) { IndexableObject scopeObject = scopeResolver.resolveScope(context, String.valueOf(searchEventRest.getScope())); - if (scopeObject instanceof DSpaceObject) { - usageSearchEvent.setScope((DSpaceObject) scopeObject); + if (scopeObject != null && scopeObject.getIndexedObject() instanceof DSpaceObject) { + usageSearchEvent.setScope((DSpaceObject) scopeObject.getIndexedObject()); } } usageSearchEvent.setConfiguration(searchEventRest.getConfiguration()); diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java index 0391cbce7a..3cd263493b 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java @@ -10,6 +10,7 @@ package org.dspace.app.rest.converter; import java.sql.SQLException; import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.model.ScopeEnum; import org.dspace.app.rest.model.SubmissionSectionRest; import org.dspace.app.rest.model.SubmissionVisibilityRest; import org.dspace.app.rest.model.VisibilityEnum; @@ -41,6 +42,7 @@ public class SubmissionSectionConverter implements DSpaceConverter