From 4a4a8bcb22796e5b22c9bbb1796e3458b48f1c07 Mon Sep 17 00:00:00 2001 From: Brian Keese Date: Tue, 15 Oct 2024 11:38:54 -0500 Subject: [PATCH 01/16] Fix full-text indexing for files over the character limit The error handler for files over the limit logged the correct message, but never actually added the full text to the index doc. --- .../indexobject/IndexFactoryImpl.java | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java index f1ae137b91..c9a865ec85 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java @@ -118,20 +118,10 @@ public abstract class IndexFactoryImpl implements ParseContext tikaContext = new ParseContext(); // Use Apache Tika to parse the full text stream(s) + boolean extractionSucceeded = false; try (InputStream fullTextStreams = streams.getStream()) { tikaParser.parse(fullTextStreams, tikaHandler, tikaMetadata, tikaContext); - - // Write Tika metadata to "tika_meta_*" fields. - // This metadata is not very useful right now, - // but we'll keep it just in case it becomes more useful. - for (String name : tikaMetadata.names()) { - for (String value : tikaMetadata.getValues(name)) { - doc.addField("tika_meta_" + name, value); - } - } - - // Save (parsed) full text to "fulltext" field - doc.addField("fulltext", tikaHandler.toString()); + extractionSucceeded = true; } catch (SAXException saxe) { // Check if this SAXException is just a notice that this file was longer than the character limit. // Unfortunately there is not a unique, public exception type to catch here. This error is thrown @@ -141,6 +131,7 @@ public abstract class IndexFactoryImpl implements // log that we only indexed up to that configured limit log.info("Full text is larger than the configured limit (discovery.solr.fulltext.charLimit)." + " Only the first {} characters were indexed.", charLimit); + extractionSucceeded = true; } else { log.error("Tika parsing error. Could not index full text.", saxe); throw new IOException("Tika parsing error. Could not index full text.", saxe); @@ -148,11 +139,19 @@ public abstract class IndexFactoryImpl implements } catch (TikaException | IOException ex) { log.error("Tika parsing error. Could not index full text.", ex); throw new IOException("Tika parsing error. Could not index full text.", ex); - } finally { - // Add document to index - solr.add(doc); } - return; + if (extractionSucceeded) { + // Write Tika metadata to "tika_meta_*" fields. + // This metadata is not very useful right now, + // but we'll keep it just in case it becomes more useful. + for (String name : tikaMetadata.names()) { + for (String value : tikaMetadata.getValues(name)) { + doc.addField("tika_meta_" + name, value); + } + } + // Save (parsed) full text to "fulltext" field + doc.addField("fulltext", tikaHandler.toString()); + } } // Add document to index solr.add(doc); From 64cb3bda0034d8e3051c31290a3eeea8a560d939 Mon Sep 17 00:00:00 2001 From: autavares-dev Date: Wed, 7 Aug 2024 16:34:38 -0300 Subject: [PATCH 02/16] Changes Group2GroupCache computation --- .../org/dspace/eperson/Group2GroupCache.java | 3 +- .../org/dspace/eperson/GroupServiceImpl.java | 76 ++++++++++--------- .../eperson/dao/Group2GroupCacheDAO.java | 74 ++++++++++++++++-- .../dao/impl/Group2GroupCacheDAOImpl.java | 34 +++++++++ 4 files changed, 145 insertions(+), 42 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java b/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java index 09bdf34d4c..58781cd402 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java +++ b/dspace-api/src/main/java/org/dspace/eperson/Group2GroupCache.java @@ -14,6 +14,7 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; +import javax.persistence.UniqueConstraint; import org.hibernate.proxy.HibernateProxyHelper; @@ -23,7 +24,7 @@ import org.hibernate.proxy.HibernateProxyHelper; * @author kevinvandevelde at atmire.com */ @Entity -@Table(name = "group2groupcache") +@Table(name = "group2groupcache", uniqueConstraints = { @UniqueConstraint(columnNames = {"parent_id", "child_id"}) }) public class Group2GroupCache implements Serializable { @Id diff --git a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java index 3fb20e2f1e..b52f17a5c6 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java @@ -20,6 +20,7 @@ import java.util.Set; import java.util.UUID; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.collections4.SetUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; @@ -673,15 +674,14 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements /** - * Regenerate the group cache AKA the group2groupcache table in the database - - * meant to be called when a group is added or removed from another group + * Returns a set with pairs of parent and child group UUIDs, representing the new cache table rows. * - * @param context The relevant DSpace Context. - * @param flushQueries flushQueries Flush all pending queries + * @param context The relevant DSpace Context. + * @param flushQueries flushQueries Flush all pending queries + * @return Pairs of parent and child group UUID of the new cache. * @throws SQLException An exception that provides information on a database access error or other errors. */ - protected void rethinkGroupCache(Context context, boolean flushQueries) throws SQLException { - + private Set> computeNewCache(Context context, boolean flushQueries) throws SQLException { Map> parents = new HashMap<>(); List> group2groupResults = groupDAO.getGroup2GroupResults(context, flushQueries); @@ -689,19 +689,8 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements UUID parent = group2groupResult.getLeft(); UUID child = group2groupResult.getRight(); - // if parent doesn't have an entry, create one - if (!parents.containsKey(parent)) { - Set children = new HashSet<>(); - - // add child id to the list - children.add(child); - parents.put(parent, children); - } else { - // parent has an entry, now add the child to the parent's record - // of children - Set children = parents.get(parent); - children.add(child); - } + parents.putIfAbsent(parent, new HashSet<>()); + parents.get(parent).add(child); } // now parents is a hash of all of the IDs of groups that are parents @@ -714,28 +703,43 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements parent.getValue().addAll(myChildren); } - // empty out group2groupcache table - group2GroupCacheDAO.deleteAll(context); - - // write out new one + // write out new cache IN MEMORY ONLY and returns it + Set> newCache = new HashSet<>(); for (Map.Entry> parent : parents.entrySet()) { UUID key = parent.getKey(); - for (UUID child : parent.getValue()) { - - Group parentGroup = find(context, key); - Group childGroup = find(context, child); - - - if (parentGroup != null && childGroup != null && group2GroupCacheDAO - .find(context, parentGroup, childGroup) == null) { - Group2GroupCache group2GroupCache = group2GroupCacheDAO.create(context, new Group2GroupCache()); - group2GroupCache.setParent(parentGroup); - group2GroupCache.setChild(childGroup); - group2GroupCacheDAO.save(context, group2GroupCache); - } + newCache.add(Pair.of(key, child)); } } + return newCache; + } + + + /** + * Regenerate the group cache AKA the group2groupcache table in the database - + * meant to be called when a group is added or removed from another group + * + * @param context The relevant DSpace Context. + * @param flushQueries flushQueries Flush all pending queries + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + protected void rethinkGroupCache(Context context, boolean flushQueries) throws SQLException { + // current cache in the database + var oldCache = group2GroupCacheDAO.getCache(context); + + // correct cache, computed from the Group table + var newCache = computeNewCache(context, flushQueries); + + var toDelete = SetUtils.difference(oldCache, newCache); + var toCreate = SetUtils.difference(newCache, oldCache); + + for (var pair : toDelete ) { + group2GroupCacheDAO.deleteFromCache(context, pair.getLeft(), pair.getRight()); + } + + for (var pair : toCreate ) { + group2GroupCacheDAO.addToCache(context, pair.getLeft(), pair.getRight()); + } } @Override diff --git a/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java b/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java index 7db569a59e..d41d52c7e6 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java +++ b/dspace-api/src/main/java/org/dspace/eperson/dao/Group2GroupCacheDAO.java @@ -9,7 +9,10 @@ package org.dspace.eperson.dao; import java.sql.SQLException; import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; import org.dspace.core.Context; import org.dspace.core.GenericDAO; import org.dspace.eperson.Group; @@ -25,13 +28,74 @@ import org.dspace.eperson.Group2GroupCache; */ public interface Group2GroupCacheDAO extends GenericDAO { - public List findByParent(Context context, Group group) throws SQLException; + /** + * Returns the current cache table as a set of UUID pairs. + * @param context The relevant DSpace Context. + * @return Set of UUID pairs, where the first element is the parent UUID and the second one is the child UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Set> getCache(Context context) throws SQLException; - public List findByChildren(Context context, Iterable groups) throws SQLException; + /** + * Returns all cache entities that are children of a given parent Group entity. + * @param context The relevant DSpace Context. + * @param group Parent group to perform the search. + * @return List of cached groups that are children of the parent group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + List findByParent(Context context, Group group) throws SQLException; - public Group2GroupCache findByParentAndChild(Context context, Group parent, Group child) throws SQLException; + /** + * Returns all cache entities that are parents of at least one group from a children groups list. + * @param context The relevant DSpace Context. + * @param groups Children groups to perform the search. + * @return List of cached groups that are parents of at least one group from the children groups list. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + List findByChildren(Context context, Iterable groups) throws SQLException; - public Group2GroupCache find(Context context, Group parent, Group child) throws SQLException; + /** + * Returns the cache entity given specific parent and child groups. + * @param context The relevant DSpace Context. + * @param parent Parent group. + * @param child Child gruoup. + * @return Cached group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Group2GroupCache findByParentAndChild(Context context, Group parent, Group child) throws SQLException; - public void deleteAll(Context context) throws SQLException; + /** + * Returns the cache entity given specific parent and child groups. + * @param context The relevant DSpace Context. + * @param parent Parent group. + * @param child Child gruoup. + * @return Cached group. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + Group2GroupCache find(Context context, Group parent, Group child) throws SQLException; + + /** + * Completely deletes the current cache table. + * @param context The relevant DSpace Context. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void deleteAll(Context context) throws SQLException; + + /** + * Deletes a specific cache row given parent and child groups UUIDs. + * @param context The relevant DSpace Context. + * @param parent Parent group UUID. + * @param child Child group UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void deleteFromCache(Context context, UUID parent, UUID child) throws SQLException; + + /** + * Adds a single row to the cache table given parent and child groups UUIDs. + * @param context The relevant DSpace Context. + * @param parent Parent group UUID. + * @param child Child group UUID. + * @throws SQLException An exception that provides information on a database access error or other errors. + */ + void addToCache(Context context, UUID parent, UUID child) throws SQLException; } diff --git a/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java b/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java index 83fb48aaf0..42ca2bb5d4 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/eperson/dao/impl/Group2GroupCacheDAOImpl.java @@ -8,14 +8,18 @@ package org.dspace.eperson.dao.impl; import java.sql.SQLException; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; +import java.util.UUID; import javax.persistence.Query; import javax.persistence.criteria.CriteriaBuilder; import javax.persistence.criteria.CriteriaQuery; import javax.persistence.criteria.Predicate; import javax.persistence.criteria.Root; +import org.apache.commons.lang3.tuple.Pair; import org.dspace.core.AbstractHibernateDAO; import org.dspace.core.Context; import org.dspace.eperson.Group; @@ -35,6 +39,16 @@ public class Group2GroupCacheDAOImpl extends AbstractHibernateDAO> getCache(Context context) throws SQLException { + Query query = createQuery( + context, + "SELECT new org.apache.commons.lang3.tuple.ImmutablePair(g.parent.id, g.child.id) FROM Group2GroupCache g" + ); + List> results = query.getResultList(); + return new HashSet>(results); + } + @Override public List findByParent(Context context, Group group) throws SQLException { CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context); @@ -90,4 +104,24 @@ public class Group2GroupCacheDAOImpl extends AbstractHibernateDAO Date: Thu, 8 Aug 2024 15:52:03 -0300 Subject: [PATCH 03/16] Refactor 'var' variables to explicit types --- .../java/org/dspace/eperson/GroupServiceImpl.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java index b52f17a5c6..4cec4c9c0d 100644 --- a/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java @@ -725,19 +725,19 @@ public class GroupServiceImpl extends DSpaceObjectServiceImpl implements */ protected void rethinkGroupCache(Context context, boolean flushQueries) throws SQLException { // current cache in the database - var oldCache = group2GroupCacheDAO.getCache(context); + Set> oldCache = group2GroupCacheDAO.getCache(context); // correct cache, computed from the Group table - var newCache = computeNewCache(context, flushQueries); + Set> newCache = computeNewCache(context, flushQueries); - var toDelete = SetUtils.difference(oldCache, newCache); - var toCreate = SetUtils.difference(newCache, oldCache); + SetUtils.SetView> toDelete = SetUtils.difference(oldCache, newCache); + SetUtils.SetView> toCreate = SetUtils.difference(newCache, oldCache); - for (var pair : toDelete ) { + for (Pair pair : toDelete ) { group2GroupCacheDAO.deleteFromCache(context, pair.getLeft(), pair.getRight()); } - for (var pair : toCreate ) { + for (Pair pair : toCreate ) { group2GroupCacheDAO.addToCache(context, pair.getLeft(), pair.getRight()); } } From aa027aefae2e39558273fcdbd511c288f8b0def3 Mon Sep 17 00:00:00 2001 From: Tim Donohue Date: Tue, 17 Dec 2024 13:11:43 -0600 Subject: [PATCH 04/16] Improve Apache Ant download process. Switch to using curl so that we can retry the request if it initially fails. (cherry picked from commit e236634a4c758a6c0d81e929b2f36d49e81b0835) --- Dockerfile | 10 ++++------ Dockerfile.cli | 10 ++++------ Dockerfile.test | 10 ++++------ 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index ff35f2a2e3..102f00abe1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,14 +42,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.13 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code update_webapps diff --git a/Dockerfile.cli b/Dockerfile.cli index be03e8922b..e208e3d921 100644 --- a/Dockerfile.cli +++ b/Dockerfile.cli @@ -38,14 +38,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.13 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code diff --git a/Dockerfile.test b/Dockerfile.test index 08b6b3018b..cc73c655b6 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -41,14 +41,12 @@ WORKDIR /dspace-src ENV ANT_VERSION=1.10.12 ENV ANT_HOME=/tmp/ant-$ANT_VERSION ENV PATH=$ANT_HOME/bin:$PATH -# Need wget to install ant -RUN apt-get update \ - && apt-get install -y --no-install-recommends wget \ - && apt-get purge -y --auto-remove \ - && rm -rf /var/lib/apt/lists/* # Download and install 'ant' RUN mkdir $ANT_HOME && \ - wget -qO- "https://archive.apache.org/dist/ant/binaries/apache-ant-$ANT_VERSION-bin.tar.gz" | tar -zx --strip-components=1 -C $ANT_HOME + curl --silent --show-error --location --fail --retry 5 --output /tmp/apache-ant.tar.gz \ + https://archive.apache.org/dist/ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz && \ + tar -zx --strip-components=1 -f /tmp/apache-ant.tar.gz -C $ANT_HOME && \ + rm /tmp/apache-ant.tar.gz # Run necessary 'ant' deploy scripts RUN ant init_installation update_configs update_code update_webapps From 58af9fd224ba37a8f3a9c8b354c04d772d6f3ee2 Mon Sep 17 00:00:00 2001 From: Tim Donohue Date: Tue, 17 Dec 2024 14:13:21 -0600 Subject: [PATCH 05/16] Significantly speed up build of dspace-dependencies by only copying over POM files (cherry picked from commit 6d7a3fcb725bbb5e42790ba5c57292477f2f3f01) --- Dockerfile.dependencies | 58 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/Dockerfile.dependencies b/Dockerfile.dependencies index 794dfa9a66..d3ca9d5e3c 100644 --- a/Dockerfile.dependencies +++ b/Dockerfile.dependencies @@ -6,7 +6,7 @@ # To build with JDK17, use "--build-arg JDK_VERSION=17" ARG JDK_VERSION=11 -# Step 1 - Run Maven Build +# Step 1 - Download all Dependencies FROM docker.io/maven:3-eclipse-temurin-${JDK_VERSION} AS build ARG TARGET_DIR=dspace-installer WORKDIR /app @@ -19,16 +19,60 @@ RUN chown -Rv dspace: /app # Switch to dspace user & run below commands as that user USER dspace -# Copy the DSpace source code (from local machine) into the workdir (excluding .dockerignore contents) -ADD --chown=dspace . /app/ +# This next part may look odd, but it speeds up the build of this image *significantly*. +# Copy ONLY the POMs to this image (from local machine). This will allow us to download all dependencies *without* +# performing any code compilation steps. + +# Parent POM +ADD --chown=dspace pom.xml /app/ +RUN mkdir -p /app/dspace + +# 'dspace' module POM. Includes 'additions' ONLY, as it's the only submodule that is required to exist. +ADD --chown=dspace dspace/pom.xml /app/dspace/ +RUN mkdir -p /app/dspace/modules/ +ADD --chown=dspace dspace/modules/pom.xml /app/dspace/modules/ +RUN mkdir -p /app/dspace/modules/additions +ADD --chown=dspace dspace/modules/additions/pom.xml /app/dspace/modules/additions/ + +# 'dspace-api' module POM +RUN mkdir -p /app/dspace-api +ADD --chown=dspace dspace-api/pom.xml /app/dspace-api/ + +# 'dspace-iiif' module POM +RUN mkdir -p /app/dspace-iiif +ADD --chown=dspace dspace-iiif/pom.xml /app/dspace-iiif/ + +# 'dspace-oai' module POM +RUN mkdir -p /app/dspace-oai +ADD --chown=dspace dspace-oai/pom.xml /app/dspace-oai/ + +# 'dspace-rdf' module POM +RUN mkdir -p /app/dspace-rdf +ADD --chown=dspace dspace-rdf/pom.xml /app/dspace-rdf/ + +# 'dspace-server-webapp' module POM +RUN mkdir -p /app/dspace-server-webapp +ADD --chown=dspace dspace-server-webapp/pom.xml /app/dspace-server-webapp/ + +# 'dspace-services' module POM +RUN mkdir -p /app/dspace-services +ADD --chown=dspace dspace-services/pom.xml /app/dspace-services/ + +# 'dspace-sword' module POM +RUN mkdir -p /app/dspace-sword +ADD --chown=dspace dspace-sword/pom.xml /app/dspace-sword/ + +# 'dspace-swordv2' module POM +RUN mkdir -p /app/dspace-swordv2 +ADD --chown=dspace dspace-swordv2/pom.xml /app/dspace-swordv2/ # Trigger the installation of all maven dependencies (hide download progress messages) # Maven flags here ensure that we skip final assembly, skip building test environment and skip all code verification checks. -# These flags speed up this installation as much as reasonably possible. -ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxml.skip=true" -RUN mvn --no-transfer-progress install ${MAVEN_FLAGS} +# These flags speed up this installation and skip tasks we cannot perform as we don't have the full source code. +ENV MAVEN_FLAGS="-P-assembly -P-test-environment -Denforcer.skip=true -Dcheckstyle.skip=true -Dlicense.skip=true -Dxjc.skip=true -Dxml.skip=true" +RUN mvn --no-transfer-progress verify ${MAVEN_FLAGS} -# Clear the contents of the /app directory (including all maven builds), so no artifacts remain. +# Clear the contents of the /app directory (including all maven target folders), so no artifacts remain. # This ensures when dspace:dspace is built, it will use the Maven local cache (~/.m2) for dependencies USER root RUN rm -rf /app/* From e5568157c87370531e6b86cbf582e160504858fb Mon Sep 17 00:00:00 2001 From: "Mark H. Wood" Date: Tue, 17 Sep 2024 08:47:54 -0400 Subject: [PATCH 06/16] More information about failed DOI registrations. (cherry picked from commit b8f4ab0eb3e90b080eaae057fb5ee673d3477dc2) --- .../dspace/identifier/doi/DOIOrganiser.java | 3 ++- .../identifier/doi/DataCiteConnector.java | 22 ++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/identifier/doi/DOIOrganiser.java b/dspace-api/src/main/java/org/dspace/identifier/doi/DOIOrganiser.java index 088e2b1cbc..12f6d7c4fc 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/doi/DOIOrganiser.java +++ b/dspace-api/src/main/java/org/dspace/identifier/doi/DOIOrganiser.java @@ -577,7 +577,8 @@ public class DOIOrganiser { } } catch (IdentifierException ex) { if (!(ex instanceof DOIIdentifierException)) { - LOG.error("It wasn't possible to register the identifier online. ", ex); + LOG.error("Registering DOI {} for object {}: the registrar returned an error.", + doiRow.getDoi(), dso.getID(), ex); } DOIIdentifierException doiIdentifierException = (DOIIdentifierException) ex; diff --git a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java index a15e3f7fdb..5bb37add2d 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java +++ b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java @@ -464,6 +464,10 @@ public class DataCiteConnector log.warn("While reserving the DOI {}, we got a http status code " + "{} and the message \"{}\".", doi, Integer.toString(resp.statusCode), resp.getContent()); + Format format = Format.getCompactFormat(); + format.setEncoding("UTF-8"); + XMLOutputter xout = new XMLOutputter(format); + log.info("We send the following XML:\n{}", xout.outputString(root)); throw new DOIIdentifierException("Unable to parse an answer from " + "DataCite API. Please have a look into DSpace logs.", DOIIdentifierException.BAD_ANSWER); @@ -635,6 +639,14 @@ public class DataCiteConnector return sendHttpRequest(httpget, doi); } + /** + * Send a DataCite metadata document to the registrar. + * + * @param doi identify the object. + * @param metadataRoot describe the object. The root element of the document. + * @return the registrar's response. + * @throws DOIIdentifierException passed through. + */ protected DataCiteResponse sendMetadataPostRequest(String doi, Element metadataRoot) throws DOIIdentifierException { Format format = Format.getCompactFormat(); @@ -643,6 +655,14 @@ public class DataCiteConnector return sendMetadataPostRequest(doi, xout.outputString(new Document(metadataRoot))); } + /** + * Send a DataCite metadata document to the registrar. + * + * @param doi identify the object. + * @param metadata describe the object. + * @return the registrar's response. + * @throws DOIIdentifierException passed through. + */ protected DataCiteResponse sendMetadataPostRequest(String doi, String metadata) throws DOIIdentifierException { // post mds/metadata/ @@ -690,7 +710,7 @@ public class DataCiteConnector * properties such as request URI and method type. * @param doi DOI string to operate on * @return response from DataCite - * @throws DOIIdentifierException if DOI error + * @throws DOIIdentifierException if registrar returns an error. */ protected DataCiteResponse sendHttpRequest(HttpUriRequest req, String doi) throws DOIIdentifierException { From fcc650e1a6fbc1311e905a349fc5ed3957dd1db7 Mon Sep 17 00:00:00 2001 From: Toni Prieto Date: Sat, 26 Oct 2024 23:33:13 +0200 Subject: [PATCH 07/16] Add limit, offset, and a new parameter to calculate the total entry count in the Solr query used for the metadata navigation index (cherry picked from commit e71de8a4d075d897f68c145233ac19ba0ec3718b) --- .../java/org/dspace/browse/BrowseEngine.java | 30 ++-------------- .../java/org/dspace/browse/SolrBrowseDAO.java | 32 ++++++++++++----- .../org/dspace/discovery/DiscoverResult.java | 11 ++++++ .../org/dspace/discovery/SolrServiceImpl.java | 34 +++++++++++++++++++ 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java b/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java index 351c362482..be7a34086a 100644 --- a/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java +++ b/dspace-api/src/main/java/org/dspace/browse/BrowseEngine.java @@ -422,9 +422,6 @@ public class BrowseEngine { } } - // this is the total number of results in answer to the query - int total = getTotalResults(true); - // set the ordering field (there is only one option) dao.setOrderField("sort_value"); @@ -444,6 +441,9 @@ public class BrowseEngine { dao.setOffset(offset); dao.setLimit(scope.getResultsPerPage()); + // this is the total number of results in answer to the query + int total = getTotalResults(true); + // Holder for the results List results = null; @@ -680,33 +680,9 @@ public class BrowseEngine { // tell the browse query whether we are distinct dao.setDistinct(distinct); - // ensure that the select is set to "*" - String[] select = {"*"}; - dao.setCountValues(select); - - // FIXME: it would be nice to have a good way of doing this in the DAO - // now reset all of the fields that we don't want to have constraining - // our count, storing them locally to reinstate later - String focusField = dao.getJumpToField(); - String focusValue = dao.getJumpToValue(); - int limit = dao.getLimit(); - int offset = dao.getOffset(); - - dao.setJumpToField(null); - dao.setJumpToValue(null); - dao.setLimit(-1); - dao.setOffset(-1); - // perform the query and get the result int count = dao.doCountQuery(); - // now put back the values we removed for this method - dao.setJumpToField(focusField); - dao.setJumpToValue(focusValue); - dao.setLimit(limit); - dao.setOffset(offset); - dao.setCountValues(null); - log.debug(LogHelper.getHeader(context, "get_total_results_return", "return=" + count)); return count; diff --git a/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java b/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java index f99aab852b..1917dec423 100644 --- a/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java +++ b/dspace-api/src/main/java/org/dspace/browse/SolrBrowseDAO.java @@ -13,6 +13,8 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.apache.solr.client.solrj.util.ClientUtils; @@ -180,18 +182,33 @@ public class SolrBrowseDAO implements BrowseDAO { addDefaultFilterQueries(query); if (distinct) { DiscoverFacetField dff; + + // To get the number of distinct values we use the next "json.facet" query param + // {"entries_count": {"type":"terms","field": "_filter", "limit":0, "numBuckets":true}}" + ObjectNode jsonFacet = JsonNodeFactory.instance.objectNode(); + ObjectNode entriesCount = JsonNodeFactory.instance.objectNode(); + entriesCount.put("type", "terms"); + entriesCount.put("field", facetField + "_filter"); + entriesCount.put("limit", 0); + entriesCount.put("numBuckets", true); + jsonFacet.set("entries_count", entriesCount); + if (StringUtils.isNotBlank(startsWith)) { dff = new DiscoverFacetField(facetField, - DiscoveryConfigurationParameters.TYPE_TEXT, -1, - DiscoveryConfigurationParameters.SORT.VALUE, startsWith); + DiscoveryConfigurationParameters.TYPE_TEXT, limit, + DiscoveryConfigurationParameters.SORT.VALUE, startsWith, offset); + + // Add the prefix to the json facet query + entriesCount.put("prefix", startsWith); } else { dff = new DiscoverFacetField(facetField, - DiscoveryConfigurationParameters.TYPE_TEXT, -1, - DiscoveryConfigurationParameters.SORT.VALUE); + DiscoveryConfigurationParameters.TYPE_TEXT, limit, + DiscoveryConfigurationParameters.SORT.VALUE, offset); } query.addFacetField(dff); query.setFacetMinCount(1); query.setMaxResults(0); + query.addProperty("json.facet", jsonFacet.toString()); } else { query.setMaxResults(limit/* > 0 ? limit : 20*/); if (offset > 0) { @@ -248,8 +265,7 @@ public class SolrBrowseDAO implements BrowseDAO { DiscoverResult resp = getSolrResponse(); int count = 0; if (distinct) { - List facetResults = resp.getFacetResult(facetField); - count = facetResults.size(); + count = (int) resp.getTotalEntries(); } else { // we need to cast to int to respect the BrowseDAO contract... count = (int) resp.getTotalSearchResults(); @@ -266,8 +282,8 @@ public class SolrBrowseDAO implements BrowseDAO { DiscoverResult resp = getSolrResponse(); List facet = resp.getFacetResult(facetField); int count = doCountQuery(); - int start = offset > 0 ? offset : 0; - int max = limit > 0 ? limit : count; //if negative, return everything + int start = 0; + int max = facet.size(); List result = new ArrayList<>(); if (ascending) { for (int i = start; i < (start + max) && i < count; i++) { diff --git a/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java b/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java index 00236d2bfe..a56804e3e7 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java +++ b/dspace-api/src/main/java/org/dspace/discovery/DiscoverResult.java @@ -32,6 +32,9 @@ public class DiscoverResult { private List indexableObjects; private Map> facetResults; + // Total count of facet entries calculated for a metadata browsing query + private long totalEntries; + /** * A map that contains all the documents sougth after, the key is a string representation of the Indexable Object */ @@ -64,6 +67,14 @@ public class DiscoverResult { this.totalSearchResults = totalSearchResults; } + public long getTotalEntries() { + return totalEntries; + } + + public void setTotalEntries(long totalEntries) { + this.totalEntries = totalEntries; + } + public int getStart() { return start; } diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index cd3797e3e3..9339b574b5 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -1055,6 +1055,8 @@ public class SolrServiceImpl implements SearchService, IndexingService { } //Resolve our facet field values resolveFacetFields(context, query, result, skipLoadingResponse, solrQueryResponse); + //Add total entries count for metadata browsing + resolveEntriesCount(result, solrQueryResponse); } // If any stale entries are found in the current page of results, // we remove those stale entries and rerun the same query again. @@ -1080,7 +1082,39 @@ public class SolrServiceImpl implements SearchService, IndexingService { return result; } + /** + * Stores the total count of entries for metadata index browsing. The count is calculated by the + * json.facet parameter with the following value: + * + *

+     * {
+     *     "entries_count": {
+     *         "type": "terms",
+     *         "field": "facetNameField_filter",
+     *         "limit": 0,
+     *         "prefix": "prefix_value",
+     *         "numBuckets": true
+     *     }
+     * }
+     * 
+ * + * This value is returned in the facets field of the Solr response. + * + * @param result DiscoverResult object where the total entries count will be stored + * @param solrQueryResponse QueryResponse object containing the solr response + */ + private void resolveEntriesCount(DiscoverResult result, QueryResponse solrQueryResponse) { + Object facetsObj = solrQueryResponse.getResponse().get("facets"); + if (facetsObj instanceof NamedList) { + NamedList facets = (NamedList) facetsObj; + Object bucketsInfoObj = facets.get("entries_count"); + if (bucketsInfoObj instanceof NamedList) { + NamedList bucketsInfo = (NamedList) bucketsInfoObj; + result.setTotalEntries((int) bucketsInfo.get("numBuckets")); + } + } + } private void resolveFacetFields(Context context, DiscoverQuery query, DiscoverResult result, boolean skipLoadingResponse, QueryResponse solrQueryResponse) throws SQLException { From 95742fe4f38842fc0747188ec941b05364ec7797 Mon Sep 17 00:00:00 2001 From: Jukka Lipka <3710455+jlipka@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:11:08 +0200 Subject: [PATCH 08/16] fix(submission): Submission scope naming fixed According to the documentation, the value for the property is 'submission' in the 'submission-forms.xml'. Without this change, an empty input field will never be marked as an error, even if the field is marked as 'required'. (cherry picked from commit 02f52c7d5c245eaae09dbd636fc8c7935d7cc242) --- dspace-api/src/main/java/org/dspace/app/util/DCInput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java index 11f9aadd86..a3d1ba208b 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java +++ b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java @@ -163,7 +163,7 @@ public class DCInput { * The scope of the input sets, this restricts hidden metadata fields from * view by the end user during submission. */ - public static final String SUBMISSION_SCOPE = "submit"; + public static final String SUBMISSION_SCOPE = "submission"; /** * Class constructor for creating a DCInput object based on the contents of From 31faf809d15d968a021a64720b620faf5f47bf0e Mon Sep 17 00:00:00 2001 From: Jukka Lipka <3710455+jlipka@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:50:59 +0100 Subject: [PATCH 09/16] fix(submission): Submission scope naming fixed Corrected wording in related code comment (cherry picked from commit ec2187ea6532d7db5f5ffa38e20e971a49b90871) --- dspace-api/src/main/java/org/dspace/app/util/DCInput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java index a3d1ba208b..0a1e77ee72 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/DCInput.java +++ b/dspace-api/src/main/java/org/dspace/app/util/DCInput.java @@ -262,7 +262,7 @@ public class DCInput { /** * Is this DCInput for display in the given scope? The scope should be - * either "workflow" or "submit", as per the input forms definition. If the + * either "workflow" or "submission", as per the input forms definition. If the * internal visibility is set to "null" then this will always return true. * * @param scope String identifying the scope that this input's visibility From 973beee2ce5f8dc993ef5eb91711b595c7786789 Mon Sep 17 00:00:00 2001 From: igorbaptist4 Date: Tue, 10 Sep 2024 18:44:15 -0300 Subject: [PATCH 10/16] fix: properly type in field id, adjust use of getProperty and add error handling when dbPath is null (when property usage-statistics.dbfile is commented (cherry picked from commit 412d5751f29f31bbdd872820a62b74970333f066) --- .../org/dspace/statistics/util/StatisticsImporter.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java b/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java index 95736a8bd6..1bd97a6f5e 100644 --- a/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java +++ b/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java @@ -357,7 +357,7 @@ public class StatisticsImporter { SolrInputDocument sid = new SolrInputDocument(); sid.addField("ip", ip); sid.addField("type", dso.getType()); - sid.addField("id", dso.getID()); + sid.addField("id", dso.getID().toString()); sid.addField("time", DateFormatUtils.format(date, SolrLoggerServiceImpl.DATE_FORMAT_8601)); sid.addField("continent", continent); sid.addField("country", country); @@ -471,13 +471,13 @@ public class StatisticsImporter { boolean verbose = line.hasOption('v'); // Find our solr server - String sserver = configurationService.getProperty("solr-statistics", "server"); + String sserver = configurationService.getProperty("solr-statistics.server"); if (verbose) { System.out.println("Writing to solr server at: " + sserver); } solr = new HttpSolrClient.Builder(sserver).build(); - String dbPath = configurationService.getProperty("usage-statistics", "dbfile"); + String dbPath = configurationService.getProperty("usage-statistics.dbfile"); try { File dbFile = new File(dbPath); geoipLookup = new DatabaseReader.Builder(dbFile).build(); @@ -492,6 +492,10 @@ public class StatisticsImporter { "Unable to load GeoLite Database file (" + dbPath + ")! You may need to reinstall it. See the DSpace " + "installation instructions for more details.", e); + } catch (NullPointerException e) { + log.error( + "The value of the property usage-statistics.dbfile is null. You may need to install the GeoLite Database file and/or uncomment the property in the config file!", + e); } From 3a223e3fefec9172538e7defa34ce982b0d72762 Mon Sep 17 00:00:00 2001 From: igorbaptist4 Date: Tue, 10 Sep 2024 20:16:47 -0300 Subject: [PATCH 11/16] fix line length checkstyle (cherry picked from commit 338f3b1d3efaa0f7f3852654121cf1f93adeae7e) --- .../java/org/dspace/statistics/util/StatisticsImporter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java b/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java index 1bd97a6f5e..354c803fe2 100644 --- a/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java +++ b/dspace-api/src/main/java/org/dspace/statistics/util/StatisticsImporter.java @@ -494,7 +494,8 @@ public class StatisticsImporter { e); } catch (NullPointerException e) { log.error( - "The value of the property usage-statistics.dbfile is null. You may need to install the GeoLite Database file and/or uncomment the property in the config file!", + "The value of the property usage-statistics.dbfile is null. You may need to install the GeoLite " + + "Database file and/or uncomment the property in the config file!", e); } From c86d082d5f6fe4a76eae018bbe6890cc0b72ff33 Mon Sep 17 00:00:00 2001 From: igorbaptist4 Date: Tue, 10 Sep 2024 13:10:14 -0300 Subject: [PATCH 12/16] fix: set default configFile (cherry picked from commit a5e8d7aa15bc56c268ba67f19657f19edaa45253) --- .../java/org/dspace/app/statistics/LogAnalyser.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java b/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java index 2e4ed69b26..c787261419 100644 --- a/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java +++ b/dspace-api/src/main/java/org/dspace/app/statistics/LogAnalyser.java @@ -281,10 +281,14 @@ public class LogAnalyser { */ private static String fileTemplate = "dspace\\.log.*"; + private static final ConfigurationService configurationService = + DSpaceServicesFactory.getInstance().getConfigurationService(); + /** * the configuration file from which to configure the analyser */ - private static String configFile; + private static String configFile = configurationService.getProperty("dspace.dir") + + File.separator + "config" + File.separator + "dstat.cfg"; /** * the output file to which to write aggregation data @@ -616,8 +620,6 @@ public class LogAnalyser { } // now do the host name and url lookup - ConfigurationService configurationService - = DSpaceServicesFactory.getInstance().getConfigurationService(); hostName = Utils.getHostName(configurationService.getProperty("dspace.ui.url")); name = configurationService.getProperty("dspace.name").trim(); url = configurationService.getProperty("dspace.ui.url").trim(); @@ -658,8 +660,6 @@ public class LogAnalyser { String myConfigFile, String myOutFile, Date myStartDate, Date myEndDate, boolean myLookUp) { - ConfigurationService configurationService - = DSpaceServicesFactory.getInstance().getConfigurationService(); if (myLogDir != null) { logDir = myLogDir; @@ -673,9 +673,6 @@ public class LogAnalyser { if (myConfigFile != null) { configFile = myConfigFile; - } else { - configFile = configurationService.getProperty("dspace.dir") - + File.separator + "config" + File.separator + "dstat.cfg"; } if (myStartDate != null) { From 1ce4e08333559bffadde8d266a1009c1c875df33 Mon Sep 17 00:00:00 2001 From: DSpace Bot <68393067+dspace-bot@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:45:16 -0600 Subject: [PATCH 13/16] [Port dspace-7_x] Fix issue with submission sections visibility (#10140) * README.md: v8 is the current release, not v7 (cherry picked from commit 2b698eff609d510c487ad2331d4e11cd28f64e9a) (cherry picked from commit 83460afb3700a2ddaabce16069b2094e4edbe8fa) * Update README.md (cherry picked from commit 671234b08f909810d798dd950f87d1818b098363) (cherry picked from commit 7a6785b1c353cbc45c4dfaad89f5252064ef1e4e) * [DURACOM-291] Expose section scope attribute (cherry picked from commit 4107f937fda1b49da2b6a81dc91026ebf4b1cd37) * README.md: v8 is the current release, not v7 (cherry picked from commit 2b698eff609d510c487ad2331d4e11cd28f64e9a) (cherry picked from commit d98499a39416d026c0b45199e90ddb9bbfc1cd9c) * Update README.md (cherry picked from commit 671234b08f909810d798dd950f87d1818b098363) (cherry picked from commit 6a707548ffa6929e8a36775063ce05e2dc02a586) --------- Co-authored-by: Christian Clauss Co-authored-by: Giuseppe Digilio --- .../dspace/app/rest/converter/SubmissionSectionConverter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java index 0391cbce7a..3cd263493b 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SubmissionSectionConverter.java @@ -10,6 +10,7 @@ package org.dspace.app.rest.converter; import java.sql.SQLException; import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.model.ScopeEnum; import org.dspace.app.rest.model.SubmissionSectionRest; import org.dspace.app.rest.model.SubmissionVisibilityRest; import org.dspace.app.rest.model.VisibilityEnum; @@ -41,6 +42,7 @@ public class SubmissionSectionConverter implements DSpaceConverter Date: Mon, 2 Dec 2024 15:35:10 +0100 Subject: [PATCH 14/16] 119664: Search event scope fix (cherry picked from commit 48956d90b7619bdb336632b454ac62ca62967a39) --- .../org/dspace/app/rest/converter/SearchEventConverter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java index 978ae2ca92..f2fb12f2bd 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/SearchEventConverter.java @@ -67,8 +67,8 @@ public class SearchEventConverter { if (searchEventRest.getScope() != null) { IndexableObject scopeObject = scopeResolver.resolveScope(context, String.valueOf(searchEventRest.getScope())); - if (scopeObject instanceof DSpaceObject) { - usageSearchEvent.setScope((DSpaceObject) scopeObject); + if (scopeObject != null && scopeObject.getIndexedObject() instanceof DSpaceObject) { + usageSearchEvent.setScope((DSpaceObject) scopeObject.getIndexedObject()); } } usageSearchEvent.setConfiguration(searchEventRest.getConfiguration()); From 8849212895d987abaca92865178c6b4d2f99452d Mon Sep 17 00:00:00 2001 From: Chris Wilper Date: Wed, 28 Apr 2021 09:29:36 -0400 Subject: [PATCH 15/16] Add Context method to uncache all entities (cherry picked from commit 8ea664adb2161b04ab8b6171bbb426d8875fe27f) --- .../main/java/org/dspace/core/Context.java | 14 +++++++- .../java/org/dspace/core/DBConnection.java | 32 ++++++++++++------- .../dspace/core/HibernateDBConnection.java | 5 +++ .../java/org/dspace/core/ContextTest.java | 25 +++++++++++++++ .../core/HibernateDBConnectionTest.java | 22 +++++++++++++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/core/Context.java b/dspace-api/src/main/java/org/dspace/core/Context.java index 02a3fee09f..dab6ab7fbd 100644 --- a/dspace-api/src/main/java/org/dspace/core/Context.java +++ b/dspace-api/src/main/java/org/dspace/core/Context.java @@ -883,7 +883,19 @@ public class Context implements AutoCloseable { } /** - * Remove an entity from the cache. This is necessary when batch processing a large number of items. + * Remove all entities from the cache and reload the current user entity. This is useful when batch processing + * a large number of entities when the calling code requires the cache to be completely cleared before continuing. + * + * @throws SQLException if a database error occurs. + */ + public void uncacheEntities() throws SQLException { + dbConnection.uncacheEntities(); + reloadContextBoundEntities(); + } + + /** + * Remove an entity from the cache. This is useful when batch processing a large number of entities + * when the calling code needs to retain some items in the cache while removing others. * * @param entity The entity to reload * @param The class of the entity. The entity must implement the {@link ReloadableEntity} interface. diff --git a/dspace-api/src/main/java/org/dspace/core/DBConnection.java b/dspace-api/src/main/java/org/dspace/core/DBConnection.java index 66e4a65dbf..c9c4ce0953 100644 --- a/dspace-api/src/main/java/org/dspace/core/DBConnection.java +++ b/dspace-api/src/main/java/org/dspace/core/DBConnection.java @@ -124,28 +124,38 @@ public interface DBConnection { public long getCacheSize() throws SQLException; /** - * Reload a DSpace object from the database. This will make sure the object + * Reload an entity from the database. This will make sure the object * is valid and stored in the cache. The returned object should be used * henceforth instead of the passed object. * - * @param type of {@link entity} - * @param entity The DSpace object to reload + * @param type of entity. + * @param entity The entity to reload. * @return the reloaded entity. - * @throws java.sql.SQLException passed through. + * @throws SQLException passed through. */ public E reloadEntity(E entity) throws SQLException; /** - * Remove a DSpace object from the session cache when batch processing a - * large number of objects. + * Remove all entities from the session cache. * - *

Objects removed from cache are not saved in any way. Therefore, if you - * have modified an object, you should be sure to {@link commit()} changes + *

Entities removed from cache are not saved in any way. Therefore, if you + * have modified any entities, you should be sure to {@link #commit()} changes * before calling this method. * - * @param Type of {@link entity} - * @param entity The DSpace object to decache. - * @throws java.sql.SQLException passed through. + * @throws SQLException passed through. + */ + public void uncacheEntities() throws SQLException; + + /** + * Remove an entity from the session cache. + * + *

Entities removed from cache are not saved in any way. Therefore, if you + * have modified the entity, you should be sure to {@link #commit()} changes + * before calling this method. + * + * @param Type of entity. + * @param entity The entity to decache. + * @throws SQLException passed through. */ public void uncacheEntity(E entity) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java b/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java index b371af80ee..bd00b844ba 100644 --- a/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java +++ b/dspace-api/src/main/java/org/dspace/core/HibernateDBConnection.java @@ -243,6 +243,11 @@ public class HibernateDBConnection implements DBConnection { } } + @Override + public void uncacheEntities() throws SQLException { + getSession().clear(); + } + /** * Evict an entity from the hibernate cache. *

diff --git a/dspace-api/src/test/java/org/dspace/core/ContextTest.java b/dspace-api/src/test/java/org/dspace/core/ContextTest.java index c6cd849d21..ccc1d2f732 100644 --- a/dspace-api/src/test/java/org/dspace/core/ContextTest.java +++ b/dspace-api/src/test/java/org/dspace/core/ContextTest.java @@ -558,4 +558,29 @@ public class ContextTest extends AbstractUnitTest { cleanupContext(instance); } + @Test + public void testUncacheEntities() throws Throwable { + // To set up the test, ensure the cache contains more than the current user entity + groupService.findByName(context, Group.ANONYMOUS); + assertTrue("Cache size should be greater than one", context.getDBConnection().getCacheSize() > 1); + + context.uncacheEntities(); + + assertThat("Cache size should be one (current user)", context.getDBConnection().getCacheSize(), equalTo(1L)); + context.reloadEntity(context.getCurrentUser()); + assertThat("Cache should only contain the current user", context.getDBConnection().getCacheSize(), equalTo(1L)); + } + + @Test + public void testUncacheEntity() throws Throwable { + // Remember the cache size after loading an entity + Group group = groupService.findByName(context, Group.ANONYMOUS); + long oldCacheSize = context.getDBConnection().getCacheSize(); + + // Uncache the entity + context.uncacheEntity(group); + + long newCacheSize = context.getDBConnection().getCacheSize(); + assertThat("Cache size should be reduced by one", newCacheSize, equalTo(oldCacheSize - 1)); + } } diff --git a/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java b/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java index 093f693d56..302844ce62 100644 --- a/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java +++ b/dspace-api/src/test/java/org/dspace/core/HibernateDBConnectionTest.java @@ -205,6 +205,28 @@ public class HibernateDBConnectionTest extends AbstractUnitTest { .contains(person)); } + /** + * Test of uncacheEntities method + */ + @Test + public void testUncacheEntities() throws SQLException { + // Get DBConnection associated with DSpace Context + HibernateDBConnection dbConnection = (HibernateDBConnection) context.getDBConnection(); + EPerson person = context.getCurrentUser(); + + assertTrue("Current user should be cached in session", dbConnection.getSession() + .contains(person)); + + dbConnection.uncacheEntities(); + assertFalse("Current user should be gone from cache", dbConnection.getSession() + .contains(person)); + + // Test ability to reload an uncached entity + person = dbConnection.reloadEntity(person); + assertTrue("Current user should be cached back in session", dbConnection.getSession() + .contains(person)); + } + /** * Test of uncacheEntity method */ From e856ae3291831feac74f01133f7e235d8513cba0 Mon Sep 17 00:00:00 2001 From: Toni Prieto Date: Sun, 27 Oct 2024 08:56:10 +0100 Subject: [PATCH 16/16] Uncache all entities during OAI indexing to reduce memory usage (cherry picked from commit 9af2e2e17cf289a2a6921d27a21d90db6ca8b9f9) --- dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java b/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java index c6aaaa34b5..2d26795778 100644 --- a/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java +++ b/dspace-oai/src/main/java/org/dspace/xoai/app/XOAI.java @@ -334,6 +334,11 @@ public class XOAI { server.add(list); server.commit(); list.clear(); + try { + context.uncacheEntities(); + } catch (SQLException ex) { + log.error("Error uncaching entities", ex); + } } } System.out.println("Total: " + i + " items");