diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml
index 447458374f..be742ed2ec 100644
--- a/dspace-api/pom.xml
+++ b/dspace-api/pom.xml
@@ -515,21 +515,7 @@
-
- org.apache.solr
- solr-cell
- ${solr.version}
-
-
- org.slf4j
- jcl-over-slf4j
-
-
- org.slf4j
- slf4j-api
-
-
-
+
commons-configurationcommons-configuration
@@ -621,6 +607,11 @@
com.google.code.findbugsannotations
+
+ joda-time
+ joda-time
+ 2.3
+
diff --git a/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java b/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
new file mode 100644
index 0000000000..60d53822a2
--- /dev/null
+++ b/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+
+/**
+ * The various Solr Parameters names to use when extracting content.
+ *
+ **/
+public interface ExtractingParams {
+
+ /**
+ * Map all generated attribute names to field names with lowercase and underscores.
+ */
+ public static final String LOWERNAMES = "lowernames";
+
+ /**
+ * if true, ignore TikaException (give up to extract text but index meta data)
+ */
+ public static final String IGNORE_TIKA_EXCEPTION = "ignoreTikaException";
+
+
+ /**
+ * The param prefix for mapping Tika metadata to Solr fields.
+ *
+ * To map a field, add a name like:
+ *
fmap.title=solr.title
+ *
+ * In this example, the tika "title" metadata value will be added to a Solr field named "solr.title"
+ *
+ *
+ */
+ public static final String MAP_PREFIX = "fmap.";
+
+ /**
+ * The boost value for the name of the field. The boost can be specified by a name mapping.
+ *
+ * For example
+ *
+ * will boost the solr.title field for this document by 2.5
+ *
+ */
+ public static final String BOOST_PREFIX = "boost.";
+
+ /**
+ * Pass in literal values to be added to the document, as in
+ *
+ * literal.myField=Foo
+ *
+ *
+ */
+ public static final String LITERALS_PREFIX = "literal.";
+
+
+ /**
+ * Restrict the extracted parts of a document to be indexed
+ * by passing in an XPath expression. All content that satisfies the XPath expr.
+ * will be passed to the {@link SolrContentHandler}.
+ *
+ * See Tika's docs for what the extracted document looks like.
+ *
+ * @see #CAPTURE_ELEMENTS
+ */
+ public static final String XPATH_EXPRESSION = "xpath";
+
+
+ /**
+ * Only extract and return the content, do not index it.
+ */
+ public static final String EXTRACT_ONLY = "extractOnly";
+
+ /**
+ * Content output format if extractOnly is true. Default is "xml", alternative is "text".
+ */
+ public static final String EXTRACT_FORMAT = "extractFormat";
+
+ /**
+ * Capture attributes separately according to the name of the element, instead of just adding them to the string buffer
+ */
+ public static final String CAPTURE_ATTRIBUTES = "captureAttr";
+
+ /**
+ * Literal field values will by default override other values such as metadata and content. Set this to false to revert to pre-4.0 behaviour
+ */
+ public static final String LITERALS_OVERRIDE = "literalsOverride";
+
+ /**
+ * Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default. This is different
+ * then the case of passing in an XPath expression.
+ *
+ * The Capture field is based on the localName returned to the {@link SolrContentHandler}
+ * by Tika, not to be confused by the mapped field. The field name can then
+ * be mapped into the index schema.
+ *
+ * For instance, a Tika document may look like:
+ *
+ * <html>
+ * ...
+ * <body>
+ * <p>some text here. <div>more text</div></p>
+ * Some more text
+ * </body>
+ *
+ * By passing in the p tag, you could capture all P tags separately from the rest of the t
+ * Thus, in the example, the capture of the P tag would be: "some text here. more text"
+ *
+ */
+ public static final String CAPTURE_ELEMENTS = "capture";
+
+ /**
+ * The type of the stream. If not specified, Tika will use mime type detection.
+ */
+ public static final String STREAM_TYPE = "stream.type";
+
+
+ /**
+ * Optional. The file name. If specified, Tika can take this into account while
+ * guessing the MIME type.
+ */
+ public static final String RESOURCE_NAME = "resource.name";
+
+ /**
+ * Optional. The password for this resource. Will be used instead of the rule based password lookup mechanisms
+ */
+ public static final String RESOURCE_PASSWORD = "resource.password";
+
+ /**
+ * Optional. If specified, the prefix will be prepended to all Metadata, such that it would be possible
+ * to setup a dynamic field to automatically capture it
+ */
+ public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
+
+ /**
+ * Optional. If specified and the name of a potential field cannot be determined, the default Field specified
+ * will be used instead.
+ */
+ public static final String DEFAULT_FIELD = "defaultField";
+
+ /**
+ * Optional. If specified, loads the file as a source for password lookups for Tika encrypted documents.
+ *
+ * File format is Java properties format with one key=value per line.
+ * The key is evaluated as a regex against the file name, and the value is the password
+ * The rules are evaluated top-bottom, i.e. the first match will be used
+ * If you want a fallback password to be always used, supply a .*=<defaultmypassword> at the end
+ */
+ public static final String PASSWORD_MAP_FILE = "passwordsFile";
+}
diff --git a/dspace-api/src/test/java/org/dspace/content/CollectionTest.java b/dspace-api/src/test/java/org/dspace/content/CollectionTest.java
index e6b5942f4d..635659125d 100644
--- a/dspace-api/src/test/java/org/dspace/content/CollectionTest.java
+++ b/dspace-api/src/test/java/org/dspace/content/CollectionTest.java
@@ -14,7 +14,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
+
import org.dspace.authorize.AuthorizeException;
import org.apache.log4j.Logger;
import org.dspace.core.Context;
diff --git a/dspace-oai/pom.xml b/dspace-oai/pom.xml
index 1ff362c4a6..692cd9ce98 100644
--- a/dspace-oai/pom.xml
+++ b/dspace-oai/pom.xml
@@ -146,16 +146,8 @@
solr-solrj${solr.version}
-
- org.apache.solr
- solr-core
- ${solr.version}
-
-
- org.apache.lucene
- lucene-core
- ${lucene.version}
-
+
+
diff --git a/dspace-solr/pom.xml b/dspace-solr/pom.xml
index ae4afc9ff8..f8817fffe0 100644
--- a/dspace-solr/pom.xml
+++ b/dspace-solr/pom.xml
@@ -31,8 +31,8 @@
war
- 4.4.0
- 4.4.0
+
+ 4.10.2${basedir}/..
@@ -143,10 +143,6 @@
solr-cell${solr.version}
-
- org.apache.lucene
- lucene-core
-
diff --git a/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java b/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java
deleted file mode 100644
index 9ced848b4f..0000000000
--- a/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java
+++ /dev/null
@@ -1,834 +0,0 @@
-/**
- * The contents of this file are subject to the license and copyright
- * detailed in the LICENSE and NOTICE files at the root of the source
- * tree and available online at
- *
- * http://www.dspace.org/license/
- */
-
-package org.apache.solr.handler.component;
-
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.request.SimpleFacets;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.search.QueryParsing;
-import org.apache.solr.search.SyntaxError;
-
-import java.io.IOException;
-import java.net.URL;
-import java.util.*;
-
-/**
- * TODO!
- *
- * @version $Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $
- * @since solr 1.3
- */
-public class FacetComponent extends SearchComponent
-{
- public static final String COMPONENT_NAME = "facet";
-
- @Override
- public void prepare(ResponseBuilder rb) throws IOException
- {
- if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
- rb.setNeedDocSet( true );
- rb.doFacets = true;
- }
- }
-
- /**
- * Actually run the query
- * @param rb
- */
- @Override
- public void process(ResponseBuilder rb) throws IOException
- {
- if (rb.doFacets) {
- SolrParams params = rb.req.getParams();
- SimpleFacets f = new SimpleFacets(rb.req,
- rb.getResults().docSet,
- params,
- rb );
-
- // TODO ???? add this directly to the response, or to the builder?
- rb.rsp.add( "facet_counts", f.getFacetCounts() );
- }
- }
-
- private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
-
- @Override
- public int distributedProcess(ResponseBuilder rb) throws IOException {
- if (!rb.doFacets) {
- return ResponseBuilder.STAGE_DONE;
- }
-
- if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
- // overlap facet refinement requests (those shards that we need a count for
- // particular facet values from), where possible, with
- // the requests to get fields (because we know that is the
- // only other required phase).
- // We do this in distributedProcess so we can look at all of the
- // requests in the outgoing queue at once.
-
-
-
- for (int shardNum=0; shardNum refinements = null;
-
- for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
- if (!dff.needRefinements) continue;
- List refList = dff._toRefine[shardNum];
- if (refList == null || refList.size()==0) continue;
-
- String key = dff.getKey(); // reuse the same key that was used for the main facet
- String termsKey = key + "__terms";
- String termsVal = StrUtils.join(refList, ',');
-
- String facetCommand;
- // add terms into the original facet.field command
- // do it via parameter reference to avoid another layer of encoding.
-
- String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
- if (dff.localParams != null) {
- facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
- } else {
- facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field;
- }
-
- if (refinements == null) {
- refinements = new ArrayList();
- }
-
- refinements.add(facetCommand);
- refinements.add(termsKey);
- refinements.add(termsVal);
- }
-
- if (refinements == null) continue;
-
-
- String shard = rb.shards[shardNum];
- ShardRequest refine = null;
- boolean newRequest = false;
-
- // try to find a request that is already going out to that shard.
- // If nshards becomes to great, we way want to move to hashing for better
- // scalability.
- for (ShardRequest sreq : rb.outgoing) {
- if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
- && sreq.shards != null
- && sreq.shards.length==1
- && sreq.shards[0].equals(shard))
- {
- refine = sreq;
- break;
- }
- }
-
- if (refine == null) {
- // we didn't find any other suitable requests going out to that shard, so
- // create one ourselves.
- newRequest = true;
- refine = new ShardRequest();
- refine.shards = new String[]{rb.shards[shardNum]};
- refine.params = new ModifiableSolrParams(rb.req.getParams());
- // don't request any documents
- refine.params.remove(CommonParams.START);
- refine.params.set(CommonParams.ROWS,"0");
- }
-
- refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
- refine.params.set(FacetParams.FACET, "true");
- refine.params.remove(FacetParams.FACET_FIELD);
- refine.params.remove(FacetParams.FACET_QUERY);
-
- for (int i=0; i 0) {
- // set the initial limit higher to increase accuracy
- dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
- dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
- } else {
- // if limit==-1, then no need to artificially lower mincount to 0 if it's 1
- dff.initialMincount = Math.min(dff.minCount, 1);
- }
- } else {
- // we're sorting by index order.
- // if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
- // if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
- // if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
- // For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
- // For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
- // For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
- // we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as
- // much as when sorting by count.
- if (dff.minCount <= 1) {
- dff.initialMincount = dff.minCount;
- } else {
- dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.shards.length);
- // dff.initialMincount = 1;
- }
- }
-
- if (dff.initialMincount != 0) {
- sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
- }
-
- // Currently this is for testing only and allows overriding of the
- // facet.limit set to the shards
- dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
-
- sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
- }
- } else {
- // turn off faceting on other requests
- sreq.params.set(FacetParams.FACET, "false");
- // we could optionally remove faceting params
- }
- }
-
- @Override
- public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
- if (!rb.doFacets) return;
-
- if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
- countFacets(rb, sreq);
- } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
- refineFacets(rb, sreq);
- }
- }
-
-
-
-
- private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
- FacetInfo fi = rb._facetInfo;
-
- for (ShardResponse srsp: sreq.responses) {
- int shardNum = rb.getShardNum(srsp.getShard());
- NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
-
- // handle facet queries
- NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
- if (facet_queries != null) {
- for (int i=0; i> facet_dates =
- (SimpleOrderedMap>)
- facet_counts.get("facet_dates");
-
- if (facet_dates != null) {
-
- // go through each facet_date
- for (Map.Entry> entry : facet_dates) {
- final String field = entry.getKey();
- if (fi.dateFacets.get(field) == null) {
- // first time we've seen this field, no merging
- fi.dateFacets.add(field, entry.getValue());
-
- } else {
- // not the first time, merge current field
-
- SimpleOrderedMap
-
- org.apache.solr
- solr-core
- ${solr.version}
-
-
- jdk.tools
- jdk.tools
-
-
-
+
org.slf4jslf4j-jdk14
diff --git a/dspace/solr/authority/conf/solrconfig.xml b/dspace/solr/authority/conf/solrconfig.xml
index ef814ba05a..837869de0d 100644
--- a/dspace/solr/authority/conf/solrconfig.xml
+++ b/dspace/solr/authority/conf/solrconfig.xml
@@ -39,7 +39,7 @@
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
- 4.4
+ 4.9
- 4.4
+ 4.9
- 4.4
+ 4.9
- 4.4
+ 4.9