diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index 447458374f..be742ed2ec 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -515,21 +515,7 @@ - - org.apache.solr - solr-cell - ${solr.version} - - - org.slf4j - jcl-over-slf4j - - - org.slf4j - slf4j-api - - - + commons-configuration commons-configuration @@ -621,6 +607,11 @@ com.google.code.findbugs annotations + + joda-time + joda-time + 2.3 + diff --git a/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java b/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java new file mode 100644 index 0000000000..60d53822a2 --- /dev/null +++ b/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.extraction; + + +/** + * The various Solr Parameters names to use when extracting content. + * + **/ +public interface ExtractingParams { + + /** + * Map all generated attribute names to field names with lowercase and underscores. + */ + public static final String LOWERNAMES = "lowernames"; + + /** + * if true, ignore TikaException (give up to extract text but index meta data) + */ + public static final String IGNORE_TIKA_EXCEPTION = "ignoreTikaException"; + + + /** + * The param prefix for mapping Tika metadata to Solr fields. + *

+ * To map a field, add a name like: + *

fmap.title=solr.title
+ * + * In this example, the tika "title" metadata value will be added to a Solr field named "solr.title" + * + * + */ + public static final String MAP_PREFIX = "fmap."; + + /** + * The boost value for the name of the field. The boost can be specified by a name mapping. + *

+ * For example + *

+   * map.title=solr.title
+   * boost.solr.title=2.5
+   * 
+ * will boost the solr.title field for this document by 2.5 + * + */ + public static final String BOOST_PREFIX = "boost."; + + /** + * Pass in literal values to be added to the document, as in + *
+   *  literal.myField=Foo 
+   * 
+ * + */ + public static final String LITERALS_PREFIX = "literal."; + + + /** + * Restrict the extracted parts of a document to be indexed + * by passing in an XPath expression. All content that satisfies the XPath expr. + * will be passed to the {@link SolrContentHandler}. + *

+ * See Tika's docs for what the extracted document looks like. + *

+ * @see #CAPTURE_ELEMENTS + */ + public static final String XPATH_EXPRESSION = "xpath"; + + + /** + * Only extract and return the content, do not index it. + */ + public static final String EXTRACT_ONLY = "extractOnly"; + + /** + * Content output format if extractOnly is true. Default is "xml", alternative is "text". + */ + public static final String EXTRACT_FORMAT = "extractFormat"; + + /** + * Capture attributes separately according to the name of the element, instead of just adding them to the string buffer + */ + public static final String CAPTURE_ATTRIBUTES = "captureAttr"; + + /** + * Literal field values will by default override other values such as metadata and content. Set this to false to revert to pre-4.0 behaviour + */ + public static final String LITERALS_OVERRIDE = "literalsOverride"; + + /** + * Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default. This is different + * then the case of passing in an XPath expression. + *

+ * The Capture field is based on the localName returned to the {@link SolrContentHandler} + * by Tika, not to be confused by the mapped field. The field name can then + * be mapped into the index schema. + *

+ * For instance, a Tika document may look like: + *

+   *  <html>
+   *    ...
+   *    <body>
+   *      <p>some text here.  <div>more text</div></p>
+   *      Some more text
+   *    </body>
+   * 
+ * By passing in the p tag, you could capture all P tags separately from the rest of the t + * Thus, in the example, the capture of the P tag would be: "some text here. more text" + * + */ + public static final String CAPTURE_ELEMENTS = "capture"; + + /** + * The type of the stream. If not specified, Tika will use mime type detection. + */ + public static final String STREAM_TYPE = "stream.type"; + + + /** + * Optional. The file name. If specified, Tika can take this into account while + * guessing the MIME type. + */ + public static final String RESOURCE_NAME = "resource.name"; + + /** + * Optional. The password for this resource. Will be used instead of the rule based password lookup mechanisms + */ + public static final String RESOURCE_PASSWORD = "resource.password"; + + /** + * Optional. If specified, the prefix will be prepended to all Metadata, such that it would be possible + * to setup a dynamic field to automatically capture it + */ + public static final String UNKNOWN_FIELD_PREFIX = "uprefix"; + + /** + * Optional. If specified and the name of a potential field cannot be determined, the default Field specified + * will be used instead. + */ + public static final String DEFAULT_FIELD = "defaultField"; + + /** + * Optional. If specified, loads the file as a source for password lookups for Tika encrypted documents. + *

+ * File format is Java properties format with one key=value per line. + * The key is evaluated as a regex against the file name, and the value is the password + * The rules are evaluated top-bottom, i.e. the first match will be used + * If you want a fallback password to be always used, supply a .*=<defaultmypassword> at the end + */ + public static final String PASSWORD_MAP_FILE = "passwordsFile"; +} diff --git a/dspace-api/src/test/java/org/dspace/content/CollectionTest.java b/dspace-api/src/test/java/org/dspace/content/CollectionTest.java index e6b5942f4d..635659125d 100644 --- a/dspace-api/src/test/java/org/dspace/content/CollectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/CollectionTest.java @@ -14,7 +14,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; + import org.dspace.authorize.AuthorizeException; import org.apache.log4j.Logger; import org.dspace.core.Context; diff --git a/dspace-oai/pom.xml b/dspace-oai/pom.xml index 1ff362c4a6..692cd9ce98 100644 --- a/dspace-oai/pom.xml +++ b/dspace-oai/pom.xml @@ -146,16 +146,8 @@ solr-solrj ${solr.version} - - org.apache.solr - solr-core - ${solr.version} - - - org.apache.lucene - lucene-core - ${lucene.version} - + + diff --git a/dspace-solr/pom.xml b/dspace-solr/pom.xml index ae4afc9ff8..f8817fffe0 100644 --- a/dspace-solr/pom.xml +++ b/dspace-solr/pom.xml @@ -31,8 +31,8 @@ war - 4.4.0 - 4.4.0 + + 4.10.2 ${basedir}/.. @@ -143,10 +143,6 @@ solr-cell ${solr.version} - - org.apache.lucene - lucene-core - diff --git a/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java b/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java deleted file mode 100644 index 9ced848b4f..0000000000 --- a/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java +++ /dev/null @@ -1,834 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ - -package org.apache.solr.handler.component; - -import org.apache.lucene.util.OpenBitSet; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.FacetParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.common.util.StrUtils; -import org.apache.solr.request.SimpleFacets; -import org.apache.solr.schema.FieldType; -import org.apache.solr.search.QueryParsing; -import org.apache.solr.search.SyntaxError; - -import java.io.IOException; -import java.net.URL; -import java.util.*; - -/** - * TODO! - * - * @version $Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $ - * @since solr 1.3 - */ -public class FacetComponent extends SearchComponent -{ - public static final String COMPONENT_NAME = "facet"; - - @Override - public void prepare(ResponseBuilder rb) throws IOException - { - if (rb.req.getParams().getBool(FacetParams.FACET,false)) { - rb.setNeedDocSet( true ); - rb.doFacets = true; - } - } - - /** - * Actually run the query - * @param rb - */ - @Override - public void process(ResponseBuilder rb) throws IOException - { - if (rb.doFacets) { - SolrParams params = rb.req.getParams(); - SimpleFacets f = new SimpleFacets(rb.req, - rb.getResults().docSet, - params, - rb ); - - // TODO ???? add this directly to the response, or to the builder? - rb.rsp.add( "facet_counts", f.getFacetCounts() ); - } - } - - private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$"; - - @Override - public int distributedProcess(ResponseBuilder rb) throws IOException { - if (!rb.doFacets) { - return ResponseBuilder.STAGE_DONE; - } - - if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { - // overlap facet refinement requests (those shards that we need a count for - // particular facet values from), where possible, with - // the requests to get fields (because we know that is the - // only other required phase). - // We do this in distributedProcess so we can look at all of the - // requests in the outgoing queue at once. - - - - for (int shardNum=0; shardNum refinements = null; - - for (DistribFieldFacet dff : rb._facetInfo.facets.values()) { - if (!dff.needRefinements) continue; - List refList = dff._toRefine[shardNum]; - if (refList == null || refList.size()==0) continue; - - String key = dff.getKey(); // reuse the same key that was used for the main facet - String termsKey = key + "__terms"; - String termsVal = StrUtils.join(refList, ','); - - String facetCommand; - // add terms into the original facet.field command - // do it via parameter reference to avoid another layer of encoding. - - String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey); - if (dff.localParams != null) { - facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2); - } else { - facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field; - } - - if (refinements == null) { - refinements = new ArrayList(); - } - - refinements.add(facetCommand); - refinements.add(termsKey); - refinements.add(termsVal); - } - - if (refinements == null) continue; - - - String shard = rb.shards[shardNum]; - ShardRequest refine = null; - boolean newRequest = false; - - // try to find a request that is already going out to that shard. - // If nshards becomes to great, we way want to move to hashing for better - // scalability. - for (ShardRequest sreq : rb.outgoing) { - if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0 - && sreq.shards != null - && sreq.shards.length==1 - && sreq.shards[0].equals(shard)) - { - refine = sreq; - break; - } - } - - if (refine == null) { - // we didn't find any other suitable requests going out to that shard, so - // create one ourselves. - newRequest = true; - refine = new ShardRequest(); - refine.shards = new String[]{rb.shards[shardNum]}; - refine.params = new ModifiableSolrParams(rb.req.getParams()); - // don't request any documents - refine.params.remove(CommonParams.START); - refine.params.set(CommonParams.ROWS,"0"); - } - - refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS; - refine.params.set(FacetParams.FACET, "true"); - refine.params.remove(FacetParams.FACET_FIELD); - refine.params.remove(FacetParams.FACET_QUERY); - - for (int i=0; i 0) { - // set the initial limit higher to increase accuracy - dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10; - dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets? - } else { - // if limit==-1, then no need to artificially lower mincount to 0 if it's 1 - dff.initialMincount = Math.min(dff.minCount, 1); - } - } else { - // we're sorting by index order. - // if minCount==0, we should always be able to get accurate results w/o over-requesting or refining - // if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine - // if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up. - // For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term - // For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1 - // For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e. - // we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as - // much as when sorting by count. - if (dff.minCount <= 1) { - dff.initialMincount = dff.minCount; - } else { - dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.shards.length); - // dff.initialMincount = 1; - } - } - - if (dff.initialMincount != 0) { - sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount); - } - - // Currently this is for testing only and allows overriding of the - // facet.limit set to the shards - dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit); - - sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit); - } - } else { - // turn off faceting on other requests - sreq.params.set(FacetParams.FACET, "false"); - // we could optionally remove faceting params - } - } - - @Override - public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { - if (!rb.doFacets) return; - - if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) { - countFacets(rb, sreq); - } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) { - refineFacets(rb, sreq); - } - } - - - - - private void countFacets(ResponseBuilder rb, ShardRequest sreq) { - FacetInfo fi = rb._facetInfo; - - for (ShardResponse srsp: sreq.responses) { - int shardNum = rb.getShardNum(srsp.getShard()); - NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); - - // handle facet queries - NamedList facet_queries = (NamedList)facet_counts.get("facet_queries"); - if (facet_queries != null) { - for (int i=0; i> facet_dates = - (SimpleOrderedMap>) - facet_counts.get("facet_dates"); - - if (facet_dates != null) { - - // go through each facet_date - for (Map.Entry> entry : facet_dates) { - final String field = entry.getKey(); - if (fi.dateFacets.get(field) == null) { - // first time we've seen this field, no merging - fi.dateFacets.add(field, entry.getValue()); - - } else { - // not the first time, merge current field - - SimpleOrderedMap shardFieldValues - = entry.getValue(); - SimpleOrderedMap existFieldValues - = fi.dateFacets.get(field); - - for (Map.Entry existPair : existFieldValues) { - final String key = existPair.getKey(); - if (key.equals("gap") || - key.equals("end") || - key.equals("start")) { - // we can skip these, must all be the same across shards - continue; - } - // can be null if inconsistencies in shards responses - Integer newValue = (Integer) shardFieldValues.get(key); - if (null != newValue) { - Integer oldValue = ((Integer) existPair.getValue()); - existPair.setValue(oldValue + newValue); - } - } - } - } - } - - // Distributed facet_ranges - // - // The implementation below uses the first encountered shard's - // facet_ranges as the basis for subsequent shards' data to be merged. - @SuppressWarnings("unchecked") - SimpleOrderedMap> facet_ranges = - (SimpleOrderedMap>) - facet_counts.get("facet_ranges"); - - if (facet_ranges != null) { - - // go through each facet_range - for (Map.Entry> entry : facet_ranges) { - final String field = entry.getKey(); - if (fi.rangeFacets.get(field) == null) { - // first time we've seen this field, no merging - fi.rangeFacets.add(field, entry.getValue()); - - } else { - // not the first time, merge current field counts - - @SuppressWarnings("unchecked") - NamedList shardFieldValues - = (NamedList) entry.getValue().get("counts"); - - @SuppressWarnings("unchecked") - NamedList existFieldValues - = (NamedList) fi.rangeFacets.get(field).get("counts"); - - for (Map.Entry existPair : existFieldValues) { - final String key = existPair.getKey(); - // can be null if inconsistencies in shards responses - Integer newValue = shardFieldValues.get(key); - if (null != newValue) { - Integer oldValue = existPair.getValue(); - existPair.setValue(oldValue + newValue); - } - } - } - } - } - } - - - // - // This code currently assumes that there will be only a single - // request ((with responses from all shards) sent out to get facets... - // otherwise we would need to wait until all facet responses were received. - // - - for (DistribFieldFacet dff : fi.facets.values()) { - // no need to check these facets for refinement - if (dff.initialLimit <= 0 && dff.initialMincount == 0) continue; - - // only other case where index-sort doesn't need refinement is if minCount==0 - if (dff.minCount == 0 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue; - - @SuppressWarnings("unchecked") // generic array's are annoying - List[] tmp = (List[]) new List[rb.shards.length]; - dff._toRefine = tmp; - - ShardFacetCount[] counts = dff.getCountSorted(); - int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE); - long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count; - - for (int i=0; i= smallestCount, then flag for refinement - long maxCount = sfc.count; - for (int shardNum=0; shardNum= smallestCount) { - // TODO: on a tie, we could check the term values - needRefinement = true; - } - } - - if (needRefinement) { - // add a query for each shard missing the term that needs refinement - for (int shardNum=0; shardNum0) { - dff.needRefinements = true; - List lst = dff._toRefine[shardNum]; - if (lst == null) { - lst = dff._toRefine[shardNum] = new ArrayList(); - } - lst.add(sfc.name); - } - } - } - } - } - } - - - private void refineFacets(ResponseBuilder rb, ShardRequest sreq) { - FacetInfo fi = rb._facetInfo; - - for (ShardResponse srsp: sreq.responses) { - // int shardNum = rb.getShardNum(srsp.shard); - NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); - NamedList facet_fields = (NamedList)facet_counts.get("facet_fields"); - - if (facet_fields == null) continue; // this can happen when there's an exception - - for (int i=0; i facet_counts = new SimpleOrderedMap(); - - NamedList facet_queries = new SimpleOrderedMap(); - facet_counts.add("facet_queries",facet_queries); - for (QueryFacet qf : fi.queryFacets.values()) { - facet_queries.add(qf.getKey(), num(qf.count)); - } - - NamedList facet_fields = new SimpleOrderedMap(); - facet_counts.add("facet_fields", facet_fields); - - for (DistribFieldFacet dff : fi.facets.values()) { - NamedList fieldCounts = new NamedList(); // order is more important for facets - facet_fields.add(dff.getKey(), fieldCounts); - - ShardFacetCount[] counts; - boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT); - if (countSorted) { - counts = dff.countSorted; - if (counts == null || dff.needRefinements) { - counts = dff.getCountSorted(); - } - } else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) { - counts = dff.getLexSorted(); - } else { // TODO: log error or throw exception? - counts = dff.getLexSorted(); - } - - if (countSorted) { - int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length); - for (int i=dff.offset; i= 0 ? dff.limit : Integer.MAX_VALUE; - - // index order... - for (int i=0; i 0) { - off--; - continue; - } - if (lim <= 0) { - break; - } - lim--; - fieldCounts.add(counts[i].name, num(count)); - } - } - - if (dff.missing) { - fieldCounts.add(null, num(dff.missingCount)); - } - } - - facet_counts.add("facet_dates", fi.dateFacets); - facet_counts.add("facet_ranges", fi.rangeFacets); - - rb.rsp.add("facet_counts", facet_counts); - - rb._facetInfo = null; // could be big, so release asap - } - - - // use tags for smaller facet counts (better back compatibility) - private Number num(long val) { - if (val < Integer.MAX_VALUE) return (int)val; - else return val; - } - private Number num(Long val) { - if (val.longValue() < Integer.MAX_VALUE) return val.intValue(); - else return val; - } - - - ///////////////////////////////////////////// - /// SolrInfoMBean - //////////////////////////////////////////// - - @Override - public String getDescription() { - return "Handle Faceting"; - } - - @Override - public String getVersion() { - return "$Revision: 1152531 $"; - } - - - public String getSourceId() { - return "$Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $"; - } - - @Override - public String getSource() { - return "$URL: http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_5_0/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java $"; - } - - @Override - public URL[] getDocs() { - return null; - } - - /** - * This API is experimental and subject to change - */ - public static class FacetInfo { - public LinkedHashMap queryFacets; - public LinkedHashMap facets; - public SimpleOrderedMap> dateFacets - = new SimpleOrderedMap>(); - public SimpleOrderedMap> rangeFacets - = new SimpleOrderedMap>(); - - public List exceptionList; - - void parse(SolrParams params, ResponseBuilder rb) { - queryFacets = new LinkedHashMap(); - facets = new LinkedHashMap(); - - String[] facetQs = params.getParams(FacetParams.FACET_QUERY); - if (facetQs != null) { - for (String query : facetQs) { - QueryFacet queryFacet = new QueryFacet(rb, query); - queryFacets.put(queryFacet.getKey(), queryFacet); - } - } - - String[] facetFs = params.getParams(FacetParams.FACET_FIELD); - if (facetFs != null) { - - for (String field : facetFs) { - DistribFieldFacet ff = new DistribFieldFacet(rb, field); - facets.put(ff.getKey(), ff); - } - } - } - } - - /** - * This API is experimental and subject to change - */ - public static class FacetBase { - String facetType; // facet.field, facet.query, etc (make enum?) - String facetStr; // original parameter value of facetStr - String facetOn; // the field or query, absent localParams if appropriate - private String key; // label in the response for the result... "foo" for {!key=foo}myfield - SolrParams localParams; // any local params for the facet - - public FacetBase(ResponseBuilder rb, String facetType, String facetStr) { - this.facetType = facetType; - this.facetStr = facetStr; - try - { - this.localParams = QueryParsing.getLocalParams(facetStr, - rb.req.getParams()); - } - catch (SyntaxError e) - { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); - } - this.facetOn = facetStr; - this.key = facetStr; - - if (localParams != null) { - // remove local params unless it's a query - if (!facetType.equals(FacetParams.FACET_QUERY)) { - facetOn = localParams.get(CommonParams.VALUE); - key = facetOn; - } - - key = localParams.get(CommonParams.OUTPUT_KEY, key); - } - } - - /** returns the key in the response that this facet will be under */ - public String getKey() { return key; } - public String getType() { return facetType; } - } - - /** - * This API is experimental and subject to change - */ - public static class QueryFacet extends FacetBase { - public long count; - - public QueryFacet(ResponseBuilder rb, String facetStr) { - super(rb, FacetParams.FACET_QUERY, facetStr); - } - } - - /** - * This API is experimental and subject to change - */ - public static class FieldFacet extends FacetBase { - public String field; // the field to facet on... "myfield" for {!key=foo}myfield - public FieldType ftype; - public int offset; - public int limit; - public int minCount; - public String sort; - public boolean missing; - public String prefix; - public long missingCount; - - public FieldFacet(ResponseBuilder rb, String facetStr) { - super(rb, FacetParams.FACET_FIELD, facetStr); - fillParams(rb, rb.req.getParams(), facetOn); - } - - private void fillParams(ResponseBuilder rb, SolrParams params, String field) { - this.field = field; - this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field); - this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); - this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); - Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT); - if (mincount==null) { - Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); - // mincount = (zeros!=null && zeros) ? 0 : 1; - mincount = (zeros!=null && !zeros) ? 1 : 0; - // current default is to include zeros. - } - this.minCount = mincount; - this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); - // default to sorting by count if there is a limit. - this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); - if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { - this.sort = FacetParams.FACET_SORT_COUNT; - } else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) { - this.sort = FacetParams.FACET_SORT_INDEX; - } - this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX); - } - } - - /** - * This API is experimental and subject to change - */ - public static class DistribFieldFacet extends FieldFacet { - public List[] _toRefine; // a List of refinements needed, one for each shard. - - // SchemaField sf; // currently unneeded - - // the max possible count for a term appearing on no list - public long missingMaxPossible; - // the max possible count for a missing term for each shard (indexed by shardNum) - public long[] missingMax; - public OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen - public HashMap counts = new HashMap(128); - public int termNum; - - public int initialLimit; // how many terms requested in first phase - public int initialMincount; // mincount param sent to each shard - public boolean needRefinements; - public ShardFacetCount[] countSorted; - - DistribFieldFacet(ResponseBuilder rb, String facetStr) { - super(rb, facetStr); - // sf = rb.req.getSchema().getField(field); - missingMax = new long[rb.shards.length]; - counted = new OpenBitSet[rb.shards.length]; - } - - void add(int shardNum, NamedList shardCounts, int numRequested) { - // shardCounts could be null if there was an exception - int sz = shardCounts == null ? 0 : shardCounts.size(); - int numReceived = sz; - - OpenBitSet terms = new OpenBitSet(termNum+sz); - - long last = 0; - for (int i=0; i() { - public int compare(ShardFacetCount o1, ShardFacetCount o2) { - return o1.indexed.compareTo(o2.indexed); - } - }); - countSorted = arr; - return arr; - } - - public ShardFacetCount[] getCountSorted() { - ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]); - Arrays.sort(arr, new Comparator() { - public int compare(ShardFacetCount o1, ShardFacetCount o2) { - if (o2.count < o1.count) return -1; - else if (o1.count < o2.count) return 1; - return o1.indexed.compareTo(o2.indexed); - } - }); - countSorted = arr; - return arr; - } - - // returns the max possible value this ShardFacetCount could have for this shard - // (assumes the shard did not report a count for this value) - long maxPossible(ShardFacetCount sfc, int shardNum) { - return missingMax[shardNum]; - // TODO: could store the last term in the shard to tell if this term - // comes before or after it. If it comes before, we could subtract 1 - } - } - - /** - * This API is experimental and subject to change - */ - public static class ShardFacetCount { - public String name; - public String indexed; // the indexed form of the name... used for comparisons. - public long count; - public int termNum; // term number starting at 0 (used in bit arrays) - - @Override - public String toString() { - return "{term="+name+",termNum="+termNum+",count="+count+"}"; - } - } -} diff --git a/dspace/modules/oai/pom.xml b/dspace/modules/oai/pom.xml index cf55d9ca74..4761512e59 100644 --- a/dspace/modules/oai/pom.xml +++ b/dspace/modules/oai/pom.xml @@ -94,17 +94,7 @@ org.slf4j slf4j-log4j12 - - org.apache.solr - solr-core - ${solr.version} - - - jdk.tools - jdk.tools - - - + org.slf4j slf4j-jdk14 diff --git a/dspace/solr/authority/conf/solrconfig.xml b/dspace/solr/authority/conf/solrconfig.xml index ef814ba05a..837869de0d 100644 --- a/dspace/solr/authority/conf/solrconfig.xml +++ b/dspace/solr/authority/conf/solrconfig.xml @@ -39,7 +39,7 @@ that you fully re-index after changing this setting as it can affect both how text is indexed and queried. --> - 4.4 + 4.9 - 4.4 + 4.9 - 4.4 + 4.9 - 4.4 + 4.9