solr version bump pt 1

2025-10-13 21:13:19 +00:00 · 2014-11-05 22:02:50 +01:00
parent b2a729aead
commit 007524b821
12 changed files with 182 additions and 882 deletions
--- a/dspace-api/pom.xml
+++ b/dspace-api/pom.xml
@@ -515,21 +515,7 @@
                </exclusion>
            </exclusions>
        </dependency>
-        <dependency>
+
            <groupId>org.apache.solr</groupId>
            <artifactId>solr-cell</artifactId>
            <version>${solr.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>jcl-over-slf4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>commons-configuration</groupId>
            <artifactId>commons-configuration</artifactId>
@@ -621,6 +607,11 @@
            <groupId>com.google.code.findbugs</groupId>
            <artifactId>annotations</artifactId>
        </dependency>
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>2.3</version>
        </dependency>
    </dependencies>
 </project>
--- a/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
+++ b/dspace-api/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java
@@ -0,0 +1,165 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.handler.extraction;
 /**
 * The various Solr Parameters names to use when extracting content.
 *
 **/
 public interface ExtractingParams {
  /**
   * Map all generated attribute names to field names with lowercase and underscores.
   */
  public static final String LOWERNAMES = "lowernames";
  /**
   * if true, ignore TikaException (give up to extract text but index meta data)
   */
  public static final String IGNORE_TIKA_EXCEPTION = "ignoreTikaException";
  /**
   * The param prefix for mapping Tika metadata to Solr fields.
   * <p/>
   * To map a field, add a name like:
   * <pre>fmap.title=solr.title</pre>
   *
   * In this example, the tika "title" metadata value will be added to a Solr field named "solr.title"
   *
   *
   */
  public static final String MAP_PREFIX = "fmap.";
  /**
   * The boost value for the name of the field.  The boost can be specified by a name mapping.
   * <p/>
   * For example
   * <pre>
   * map.title=solr.title
   * boost.solr.title=2.5
   * </pre>
   * will boost the solr.title field for this document by 2.5
   *
   */
  public static final String BOOST_PREFIX = "boost.";
  /**
   * Pass in literal values to be added to the document, as in
   * <pre>
   *  literal.myField=Foo 
   * </pre>
   *
   */
  public static final String LITERALS_PREFIX = "literal.";
  /**
   * Restrict the extracted parts of a document to be indexed
   *  by passing in an XPath expression.  All content that satisfies the XPath expr.
   * will be passed to the {@link SolrContentHandler}.
   * <p/>
   * See Tika's docs for what the extracted document looks like.
   * <p/>
   * @see #CAPTURE_ELEMENTS
   */
  public static final String XPATH_EXPRESSION = "xpath";
  /**
   * Only extract and return the content, do not index it.
   */
  public static final String EXTRACT_ONLY = "extractOnly";
  /**
   * Content output format if extractOnly is true. Default is "xml", alternative is "text".
   */
  public static final String EXTRACT_FORMAT = "extractFormat";
  /**
   * Capture attributes separately according to the name of the element, instead of just adding them to the string buffer
   */
  public static final String CAPTURE_ATTRIBUTES = "captureAttr";
  /**
   * Literal field values will by default override other values such as metadata and content. Set this to false to revert to pre-4.0 behaviour
   */
  public static final String LITERALS_OVERRIDE = "literalsOverride";
  /**
   * Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default.  This is different
   * then the case of passing in an XPath expression.
   * <p/>
   * The Capture field is based on the localName returned to the {@link SolrContentHandler}
   * by Tika, not to be confused by the mapped field.  The field name can then
   * be mapped into the index schema.
   * <p/>
   * For instance, a Tika document may look like:
   * <pre>
   *  &lt;html&gt;
   *    ...
   *    &lt;body&gt;
   *      &lt;p&gt;some text here.  &lt;div&gt;more text&lt;/div&gt;&lt;/p&gt;
   *      Some more text
   *    &lt;/body&gt;
   * </pre>
   * By passing in the p tag, you could capture all P tags separately from the rest of the t
   * Thus, in the example, the capture of the P tag would be: "some text here.  more text"
   *
   */
  public static final String CAPTURE_ELEMENTS = "capture";
  /**
   * The type of the stream.  If not specified, Tika will use mime type detection.
   */
  public static final String STREAM_TYPE = "stream.type";
  /**
   * Optional.  The file name. If specified, Tika can take this into account while
   * guessing the MIME type.
   */
  public static final String RESOURCE_NAME = "resource.name";
  /**
   * Optional. The password for this resource. Will be used instead of the rule based password lookup mechanisms 
   */
  public static final String RESOURCE_PASSWORD = "resource.password";
  /**
   * Optional.  If specified, the prefix will be prepended to all Metadata, such that it would be possible
   * to setup a dynamic field to automatically capture it
   */
  public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
  /**
   * Optional.  If specified and the name of a potential field cannot be determined, the default Field specified
   * will be used instead.
   */
  public static final String DEFAULT_FIELD = "defaultField";
  /**
   * Optional. If specified, loads the file as a source for password lookups for Tika encrypted documents.
   * <p>
   * File format is Java properties format with one key=value per line.
   * The key is evaluated as a regex against the file name, and the value is the password
   * The rules are evaluated top-bottom, i.e. the first match will be used
   * If you want a fallback password to be always used, supply a .*=&lt;defaultmypassword&gt; at the end  
   */
  public static final String PASSWORD_MAP_FILE = "passwordsFile";
 }
--- a/dspace-api/src/test/java/org/dspace/content/CollectionTest.java
+++ b/dspace-api/src/test/java/org/dspace/content/CollectionTest.java
@@ -14,7 +14,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
+
 import org.dspace.authorize.AuthorizeException;
 import org.apache.log4j.Logger;
 import org.dspace.core.Context;
--- a/dspace-oai/pom.xml
+++ b/dspace-oai/pom.xml
@@ -146,16 +146,8 @@
            <artifactId>solr-solrj</artifactId>
            <version>${solr.version}</version>
        </dependency>
-        <dependency>
+
-            <groupId>org.apache.solr</groupId>
+
            <artifactId>solr-core</artifactId>
            <version>${solr.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <!-- Web API -->
        <dependency>
--- a/dspace-solr/pom.xml
+++ b/dspace-solr/pom.xml
@@ -31,8 +31,8 @@
    <packaging>war</packaging>
    <properties>
-        <lucene.version>4.4.0</lucene.version>
+
-        <solr.version>4.4.0</solr.version>
+        <solr.version>4.10.2</solr.version>
        <!-- 'root.basedir' is the path to the root [dspace-src] dir. It must be redefined by each child POM,
             as it is used to reference the LICENSE_HEADER and *.properties file(s) in that directory. -->
        <root.basedir>${basedir}/..</root.basedir>
@@ -143,10 +143,6 @@
            <artifactId>solr-cell</artifactId>
            <version>${solr.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
        </dependency>
        <!-- Replace J.U.L. logging with log4j -->
        <dependency>
--- a/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java
+++ b/dspace-solr/src/main/java/org/apache/solr/handler/component/FacetComponent.java
@@ -1,834 +0,0 @@
 /**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
 package org.apache.solr.handler.component;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.request.SimpleFacets;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.QueryParsing;
 import org.apache.solr.search.SyntaxError;
 import java.io.IOException;
 import java.net.URL;
 import java.util.*;
 /**
 * TODO!
 *
 * @version $Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $
 * @since solr 1.3
 */
 public class  FacetComponent extends SearchComponent
 {
  public static final String COMPONENT_NAME = "facet";
  @Override
  public void prepare(ResponseBuilder rb) throws IOException
  {
    if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
      rb.setNeedDocSet( true );
      rb.doFacets = true;
    }
  }
  /**
   * Actually run the query
   * @param rb
   */
  @Override
  public void process(ResponseBuilder rb) throws IOException
  {
    if (rb.doFacets) {
      SolrParams params = rb.req.getParams();
      SimpleFacets f = new SimpleFacets(rb.req,
              rb.getResults().docSet,
              params,
              rb );
      // TODO ???? add this directly to the response, or to the builder?
      rb.rsp.add( "facet_counts", f.getFacetCounts() );
    }
  }
  private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
  @Override
  public int distributedProcess(ResponseBuilder rb) throws IOException {
    if (!rb.doFacets) {
      return ResponseBuilder.STAGE_DONE;
    }
    if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
      // overlap facet refinement requests (those shards that we need a count for
      // particular facet values from), where possible, with
      // the requests to get fields (because we know that is the
      // only other required phase).
      // We do this in distributedProcess so we can look at all of the
      // requests in the outgoing queue at once.
      for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
        List<String> refinements = null;
        for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
          if (!dff.needRefinements) continue;
          List<String> refList = dff._toRefine[shardNum];
          if (refList == null || refList.size()==0) continue;
          String key = dff.getKey();  // reuse the same key that was used for the main facet
          String termsKey = key + "__terms";
          String termsVal = StrUtils.join(refList, ',');
          String facetCommand;
          // add terms into the original facet.field command
          // do it via parameter reference to avoid another layer of encoding.
          String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
          if (dff.localParams != null) {
            facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
          } else {
            facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field;
          }
          if (refinements == null) {
            refinements = new ArrayList<String>();
          }
          refinements.add(facetCommand);
          refinements.add(termsKey);
          refinements.add(termsVal);
        }
        if (refinements == null) continue;
        String shard = rb.shards[shardNum];
        ShardRequest refine = null;
        boolean newRequest = false;
        // try to find a request that is already going out to that shard.
        // If nshards becomes to great, we way want to move to hashing for better
        // scalability.
        for (ShardRequest sreq : rb.outgoing) {
          if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
                  && sreq.shards != null
                  && sreq.shards.length==1
                  && sreq.shards[0].equals(shard))
          {
            refine = sreq;
            break;
          }
        }
        if (refine == null) {
          // we didn't find any other suitable requests going out to that shard, so
          // create one ourselves.
          newRequest = true;
          refine = new ShardRequest();
          refine.shards = new String[]{rb.shards[shardNum]};
          refine.params = new ModifiableSolrParams(rb.req.getParams());
          // don't request any documents
          refine.params.remove(CommonParams.START);
          refine.params.set(CommonParams.ROWS,"0");
        }
        refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
        refine.params.set(FacetParams.FACET, "true");
        refine.params.remove(FacetParams.FACET_FIELD);
        refine.params.remove(FacetParams.FACET_QUERY);
        for (int i=0; i<refinements.size();) {
          String facetCommand=refinements.get(i++);
          String termsKey=refinements.get(i++);
          String termsVal=refinements.get(i++);
          refine.params.add(FacetParams.FACET_FIELD, facetCommand);
          refine.params.set(termsKey, termsVal);
        }
        if (newRequest) {
          rb.addRequest(this, refine);
        }
      }
    }
    return ResponseBuilder.STAGE_DONE;
  }
  @Override
  public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
    if (!rb.doFacets) return;
    if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
        sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS;
        FacetInfo fi = rb._facetInfo;
        if (fi == null) {
          rb._facetInfo = fi = new FacetInfo();
          fi.parse(rb.req.getParams(), rb);
          // should already be true...
          // sreq.params.set(FacetParams.FACET, "true");
        }
        sreq.params.remove(FacetParams.FACET_MINCOUNT);
        sreq.params.remove(FacetParams.FACET_OFFSET);
        sreq.params.remove(FacetParams.FACET_LIMIT);
        for (DistribFieldFacet dff : fi.facets.values()) {
          String paramStart = "f." + dff.field + '.';
          sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
          sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
          dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit;
          if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
            if (dff.limit > 0) {
              // set the initial limit higher to increase accuracy
              dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
              dff.initialMincount = 0;      // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
            } else {
              // if limit==-1, then no need to artificially lower mincount to 0 if it's 1
              dff.initialMincount = Math.min(dff.minCount, 1);
            }
          } else {
            // we're sorting by index order.
            // if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
            // if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
            // if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
            // For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
            // For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
            // For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
            //   we needed to go beyond the top 10 to generate the top 10 combined).  Overrequesting can help a little here, but not as
            //   much as when sorting by count.
            if (dff.minCount <= 1) {
              dff.initialMincount = dff.minCount;
            } else {
              dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.shards.length);
              // dff.initialMincount = 1;
            }
          }
          if (dff.initialMincount != 0) {
            sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
          }
          // Currently this is for testing only and allows overriding of the
          // facet.limit set to the shards
          dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
          sreq.params.set(paramStart + FacetParams.FACET_LIMIT,  dff.initialLimit);
      }
    } else {
      // turn off faceting on other requests
      sreq.params.set(FacetParams.FACET, "false");
      // we could optionally remove faceting params
    }
  }
  @Override
  public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
    if (!rb.doFacets) return;
    if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
      countFacets(rb, sreq);
    } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
      refineFacets(rb, sreq);
    }
  }
  private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
    FacetInfo fi = rb._facetInfo;
    for (ShardResponse srsp: sreq.responses) {
      int shardNum = rb.getShardNum(srsp.getShard());
      NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
      // handle facet queries
      NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
      if (facet_queries != null) {
        for (int i=0; i<facet_queries.size(); i++) {
          String returnedKey = facet_queries.getName(i);
          long count = ((Number)facet_queries.getVal(i)).longValue();
          QueryFacet qf = fi.queryFacets.get(returnedKey);
          qf.count += count;
        }
      }
      // step through each facet.field, adding results from this shard
      NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
      if (facet_fields != null) {
        for (DistribFieldFacet dff : fi.facets.values()) {
          dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
        }
      }
      // Distributed facet_dates
      //
      // The implementation below uses the first encountered shard's 
      // facet_dates as the basis for subsequent shards' data to be merged.
      // (the "NOW" param should ensure consistency)
      @SuppressWarnings("unchecked")
      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates = 
        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
        facet_counts.get("facet_dates");
      if (facet_dates != null) {
        // go through each facet_date
        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
          final String field = entry.getKey();
          if (fi.dateFacets.get(field) == null) { 
            // first time we've seen this field, no merging
            fi.dateFacets.add(field, entry.getValue());
          } else { 
            // not the first time, merge current field
            SimpleOrderedMap<Object> shardFieldValues 
              = entry.getValue();
            SimpleOrderedMap<Object> existFieldValues 
              = fi.dateFacets.get(field);
            for (Map.Entry<String,Object> existPair : existFieldValues) {
              final String key = existPair.getKey();
              if (key.equals("gap") || 
                  key.equals("end") || 
                  key.equals("start")) {
                // we can skip these, must all be the same across shards
                continue; 
              }
              // can be null if inconsistencies in shards responses
              Integer newValue = (Integer) shardFieldValues.get(key);
              if  (null != newValue) {
                Integer oldValue = ((Integer) existPair.getValue());
                existPair.setValue(oldValue + newValue);
              }
            }
          }
        }
      }
      // Distributed facet_ranges
      //
      // The implementation below uses the first encountered shard's 
      // facet_ranges as the basis for subsequent shards' data to be merged.
      @SuppressWarnings("unchecked")
      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges = 
        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
        facet_counts.get("facet_ranges");
      if (facet_ranges != null) {
        // go through each facet_range
        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
          final String field = entry.getKey();
          if (fi.rangeFacets.get(field) == null) { 
            // first time we've seen this field, no merging
            fi.rangeFacets.add(field, entry.getValue());
          } else { 
            // not the first time, merge current field counts
            @SuppressWarnings("unchecked")
            NamedList<Integer> shardFieldValues 
              = (NamedList<Integer>) entry.getValue().get("counts");
            @SuppressWarnings("unchecked")
            NamedList<Integer> existFieldValues 
              = (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
            for (Map.Entry<String,Integer> existPair : existFieldValues) {
              final String key = existPair.getKey();
              // can be null if inconsistencies in shards responses
              Integer newValue = shardFieldValues.get(key);
              if  (null != newValue) {
                Integer oldValue = existPair.getValue();
                existPair.setValue(oldValue + newValue);
              }
            }
          }
        }
      }
    }
    //
    // This code currently assumes that there will be only a single
    // request ((with responses from all shards) sent out to get facets...
    // otherwise we would need to wait until all facet responses were received.
    //
    for (DistribFieldFacet dff : fi.facets.values()) {
       // no need to check these facets for refinement
      if (dff.initialLimit <= 0 && dff.initialMincount == 0) continue;
      // only other case where index-sort doesn't need refinement is if minCount==0
      if (dff.minCount == 0 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
      @SuppressWarnings("unchecked") // generic array's are annoying
      List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
      dff._toRefine = tmp;
      ShardFacetCount[] counts = dff.getCountSorted();
      int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
      long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count;
      for (int i=0; i<counts.length; i++) {
        ShardFacetCount sfc = counts[i];
        boolean needRefinement = false;
        if (i<ntop) {
          // automatically flag the top values for refinement
          // this should always be true for facet.sort=index
          needRefinement = true;
        } else {
          // this logic should only be invoked for facet.sort=index (for now)
          // calculate the maximum value that this term may have
          // and if it is >= smallestCount, then flag for refinement
          long maxCount = sfc.count;
          for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
            OpenBitSet obs = dff.counted[shardNum];
            if (!obs.get(sfc.termNum)) {
              // if missing from this shard, add the max it could be
              maxCount += dff.maxPossible(sfc,shardNum);
            }
          }
          if (maxCount >= smallestCount) {
            // TODO: on a tie, we could check the term values
            needRefinement = true;
          }
        }
        if (needRefinement) {
          // add a query for each shard missing the term that needs refinement
          for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
            OpenBitSet obs = dff.counted[shardNum];
            if (!obs.get(sfc.termNum) && dff.maxPossible(sfc,shardNum)>0) {
              dff.needRefinements = true;
              List<String> lst = dff._toRefine[shardNum];
              if (lst == null) {
                lst = dff._toRefine[shardNum] = new ArrayList<String>();
              }
              lst.add(sfc.name);
            }
          }
        }
      }
    }
  }
  private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
    FacetInfo fi = rb._facetInfo;
    for (ShardResponse srsp: sreq.responses) {
      // int shardNum = rb.getShardNum(srsp.shard);
      NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
      NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
      if (facet_fields == null) continue; // this can happen when there's an exception      
      for (int i=0; i<facet_fields.size(); i++) {
        String key = facet_fields.getName(i);
        DistribFieldFacet dff = fi.facets.get(key);
        if (dff == null) continue;
        NamedList shardCounts = (NamedList)facet_fields.getVal(i);
        for (int j=0; j<shardCounts.size(); j++) {
          String name = shardCounts.getName(j);
          long count = ((Number)shardCounts.getVal(j)).longValue();
          ShardFacetCount sfc = dff.counts.get(name);
          sfc.count += count;
        }
      }
    }
  }
  @Override
  public void finishStage(ResponseBuilder rb) {
    if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
    // wait until STAGE_GET_FIELDS
    // so that "result" is already stored in the response (for aesthetics)
    FacetInfo fi = rb._facetInfo;
    NamedList<Object> facet_counts = new SimpleOrderedMap<Object>();
    NamedList facet_queries = new SimpleOrderedMap();
    facet_counts.add("facet_queries",facet_queries);
    for (QueryFacet qf : fi.queryFacets.values()) {
      facet_queries.add(qf.getKey(), num(qf.count));
    }
    NamedList<Object> facet_fields = new SimpleOrderedMap<Object>();
    facet_counts.add("facet_fields", facet_fields);
    for (DistribFieldFacet dff : fi.facets.values()) {
      NamedList<Object> fieldCounts = new NamedList<Object>(); // order is more important for facets
      facet_fields.add(dff.getKey(), fieldCounts);
      ShardFacetCount[] counts;
      boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT);
      if (countSorted) {
        counts = dff.countSorted;
        if (counts == null || dff.needRefinements) {
          counts = dff.getCountSorted();
        }
      } else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) {
          counts = dff.getLexSorted();
      } else { // TODO: log error or throw exception?
          counts = dff.getLexSorted();
      }
      if (countSorted) {
        int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
        for (int i=dff.offset; i<end; i++) {
          if (counts[i].count < dff.minCount) {
            break;
          }
          fieldCounts.add(counts[i].name, num(counts[i].count));
        }
      } else {
        int off = dff.offset;
        int lim = dff.limit >= 0 ? dff.limit : Integer.MAX_VALUE;
        // index order...
        for (int i=0; i<counts.length; i++) {
          long count = counts[i].count;
          if (count < dff.minCount) continue;
          if (off > 0) {
            off--;
            continue;
          }
          if (lim <= 0) {
            break;
          }
          lim--;
          fieldCounts.add(counts[i].name, num(count));
        }
      }
      if (dff.missing) {
        fieldCounts.add(null, num(dff.missingCount));
      }
    }
    facet_counts.add("facet_dates", fi.dateFacets);
    facet_counts.add("facet_ranges", fi.rangeFacets);
    rb.rsp.add("facet_counts", facet_counts);
    rb._facetInfo = null;  // could be big, so release asap
  }
  // use <int> tags for smaller facet counts (better back compatibility)
  private Number num(long val) {
   if (val < Integer.MAX_VALUE) return (int)val;
   else return val;
  }
  private Number num(Long val) {
    if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
    else return val;
  }
  /////////////////////////////////////////////
  ///  SolrInfoMBean
  ////////////////////////////////////////////
  @Override
  public String getDescription() {
    return "Handle Faceting";
  }
  @Override
  public String getVersion() {
    return "$Revision: 1152531 $";
  }
  public String getSourceId() {
    return "$Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $";
  }
  @Override
  public String getSource() {
    return "$URL: http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_5_0/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java $";
  }
  @Override
  public URL[] getDocs() {
    return null;
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class FacetInfo {
    public LinkedHashMap<String,QueryFacet> queryFacets;
    public LinkedHashMap<String,DistribFieldFacet> facets;
    public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
    public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
    public List exceptionList;
    void parse(SolrParams params, ResponseBuilder rb) {
      queryFacets = new LinkedHashMap<String,QueryFacet>();
      facets = new LinkedHashMap<String,DistribFieldFacet>();
      String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
      if (facetQs != null) {
        for (String query : facetQs) {
          QueryFacet queryFacet = new QueryFacet(rb, query);
          queryFacets.put(queryFacet.getKey(), queryFacet);
        }
      }
      String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
      if (facetFs != null) {
        for (String field : facetFs) {
          DistribFieldFacet ff = new DistribFieldFacet(rb, field);
          facets.put(ff.getKey(), ff);
        }
      }
    }
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class FacetBase {
    String facetType;  // facet.field, facet.query, etc (make enum?)
    String facetStr;   // original parameter value of facetStr
    String facetOn;    // the field or query, absent localParams if appropriate
    private String key; // label in the response for the result... "foo" for {!key=foo}myfield
    SolrParams localParams;  // any local params for the facet
    public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
      this.facetType = facetType;
      this.facetStr = facetStr;
      try
      {
          this.localParams = QueryParsing.getLocalParams(facetStr,
          rb.req.getParams());
      }
      catch (SyntaxError e)
      {
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
      this.facetOn = facetStr;
      this.key = facetStr;
      if (localParams != null) {
        // remove local params unless it's a query
        if (!facetType.equals(FacetParams.FACET_QUERY)) {
          facetOn = localParams.get(CommonParams.VALUE);
          key = facetOn;
        }
        key = localParams.get(CommonParams.OUTPUT_KEY, key);
      }
    }
    /** returns the key in the response that this facet will be under */
    public String getKey() { return key; }
    public String getType() { return facetType; }
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class QueryFacet extends FacetBase {
    public long count;
    public QueryFacet(ResponseBuilder rb, String facetStr) {
      super(rb, FacetParams.FACET_QUERY, facetStr);
    }
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class FieldFacet extends FacetBase {
    public String field;     // the field to facet on... "myfield" for {!key=foo}myfield
    public FieldType ftype;
    public int offset;
    public int limit;
    public int minCount;
    public String sort;
    public boolean missing;
    public String prefix;
    public long missingCount;
    public FieldFacet(ResponseBuilder rb, String facetStr) {
      super(rb, FacetParams.FACET_FIELD, facetStr);
      fillParams(rb, rb.req.getParams(), facetOn);
    }
    private void fillParams(ResponseBuilder rb, SolrParams params, String field) {
      this.field = field;
      this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
      this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
      this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
      Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
      if (mincount==null) {
        Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
        // mincount = (zeros!=null && zeros) ? 0 : 1;
        mincount = (zeros!=null && !zeros) ? 1 : 0;
        // current default is to include zeros.
      }
      this.minCount = mincount;
      this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
      // default to sorting by count if there is a limit.
      this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
      if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        this.sort = FacetParams.FACET_SORT_COUNT;
      } else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
        this.sort = FacetParams.FACET_SORT_INDEX;
      }
      this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
    }
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class DistribFieldFacet extends FieldFacet {
    public List<String>[] _toRefine; // a List<String> of refinements needed, one for each shard.
    // SchemaField sf;    // currently unneeded
    // the max possible count for a term appearing on no list
    public long missingMaxPossible;
    // the max possible count for a missing term for each shard (indexed by shardNum)
    public long[] missingMax;
    public OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
    public HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128);
    public int termNum;
    public int initialLimit;     // how many terms requested in first phase
    public int initialMincount;  // mincount param sent to each shard
    public boolean needRefinements;
    public ShardFacetCount[] countSorted;
    DistribFieldFacet(ResponseBuilder rb, String facetStr) {
      super(rb, facetStr);
      // sf = rb.req.getSchema().getField(field);
      missingMax = new long[rb.shards.length];
      counted = new OpenBitSet[rb.shards.length];
    }
    void add(int shardNum, NamedList shardCounts, int numRequested) {
      // shardCounts could be null if there was an exception
      int sz = shardCounts == null ? 0 : shardCounts.size();
      int numReceived = sz;
      OpenBitSet terms = new OpenBitSet(termNum+sz);
      long last = 0;
      for (int i=0; i<sz; i++) {
        String name = shardCounts.getName(i);
        long count = ((Number)shardCounts.getVal(i)).longValue();
        if (name == null) {
          missingCount += count;
          numReceived--;
        } else {
          ShardFacetCount sfc = counts.get(name);
          if (sfc == null) {
            sfc = new ShardFacetCount();
            sfc.name = name;
            sfc.indexed = ftype == null ? sfc.name : ftype.toInternal(sfc.name);
            sfc.termNum = termNum++;
            counts.put(name, sfc);
          }
          sfc.count += count;
          terms.fastSet(sfc.termNum);
          last = count;
        }
      }
      // the largest possible missing term is initialMincount if we received less
      // than the number requested.
      if (numRequested<0 || numRequested != 0 && numReceived < numRequested) {
        last = initialMincount;
      }
      missingMaxPossible += last;
      missingMax[shardNum] = last;
      counted[shardNum] = terms;
    }
    public ShardFacetCount[] getLexSorted() {
      ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
      Arrays.sort(arr, new Comparator<ShardFacetCount>() {
        public int compare(ShardFacetCount o1, ShardFacetCount o2) {
          return o1.indexed.compareTo(o2.indexed);
        }
      });
      countSorted = arr;
      return arr;
    }
    public ShardFacetCount[] getCountSorted() {
      ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
      Arrays.sort(arr, new Comparator<ShardFacetCount>() {
        public int compare(ShardFacetCount o1, ShardFacetCount o2) {
          if (o2.count < o1.count) return -1;
          else if (o1.count < o2.count) return 1;
          return o1.indexed.compareTo(o2.indexed);
        }
      });
      countSorted = arr;
      return arr;
    }
    // returns the max possible value this ShardFacetCount could have for this shard
    // (assumes the shard did not report a count for this value)
    long maxPossible(ShardFacetCount sfc, int shardNum) {
      return missingMax[shardNum];
      // TODO: could store the last term in the shard to tell if this term
      // comes before or after it.  If it comes before, we could subtract 1
    }
  }
  /**
   * <b>This API is experimental and subject to change</b>
   */
  public static class ShardFacetCount {
    public String name;
    public String indexed;  // the indexed form of the name... used for comparisons.
    public long count;
    public int termNum;  // term number starting at 0 (used in bit arrays)
    @Override
    public String toString() {
      return "{term="+name+",termNum="+termNum+",count="+count+"}";
    }
  }
 }
--- a/dspace/modules/oai/pom.xml
+++ b/dspace/modules/oai/pom.xml
@@ -94,17 +94,7 @@
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
        </dependency>
-        <dependency>
+
            <groupId>org.apache.solr</groupId>
            <artifactId>solr-core</artifactId>
            <version>${solr.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>jdk.tools</groupId>
                    <artifactId>jdk.tools</artifactId>
                </exclusion>
            </exclusions> 
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-jdk14</artifactId>
--- a/dspace/solr/authority/conf/solrconfig.xml
+++ b/dspace/solr/authority/conf/solrconfig.xml
@@ -39,7 +39,7 @@
       that you fully re-index after changing this setting as it can
       affect both how text is indexed and queried.
    -->
-    <luceneMatchVersion>4.4</luceneMatchVersion>
+    <luceneMatchVersion>4.9</luceneMatchVersion>
  <!-- lib directives can be used to instruct Solr to load an Jars identified
       and use them to resolve any "plugins" specified in your solrconfig.xml or
--- a/dspace/solr/oai/conf/solrconfig.xml
+++ b/dspace/solr/oai/conf/solrconfig.xml
@@ -35,7 +35,7 @@
       that you fully re-index after changing this setting as it can
       affect both how text is indexed and queried.
  -->
-  <luceneMatchVersion>4.4</luceneMatchVersion>
+  <luceneMatchVersion>4.9</luceneMatchVersion>
  <!-- <lib/> directives can be used to instruct Solr to load an Jars
       identified and use them to resolve any "plugins" specified in
--- a/dspace/solr/search/conf/solrconfig.xml
+++ b/dspace/solr/search/conf/solrconfig.xml
@@ -35,7 +35,7 @@
       that you fully re-index after changing this setting as it can
       affect both how text is indexed and queried.
  -->
-  <luceneMatchVersion>4.4</luceneMatchVersion>
+  <luceneMatchVersion>4.9</luceneMatchVersion>
  <!-- <lib/> directives can be used to instruct Solr to load an Jars
       identified and use them to resolve any "plugins" specified in
--- a/dspace/solr/statistics/conf/solrconfig.xml
+++ b/dspace/solr/statistics/conf/solrconfig.xml
@@ -35,7 +35,7 @@
       that you fully re-index after changing this setting as it can
       affect both how text is indexed and queried.
  -->
-  <luceneMatchVersion>4.4</luceneMatchVersion>
+  <luceneMatchVersion>4.9</luceneMatchVersion>
  <!-- <lib/> directives can be used to instruct Solr to load an Jars
       identified and use them to resolve any "plugins" specified in
--- a/pom.xml
+++ b/pom.xml
@@ -32,7 +32,7 @@
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <java.version>1.7</java.version>
        <lucene.version>4.4.0</lucene.version>
-        <solr.version>4.4.0</solr.version>
+        <solr.version>4.10.2</solr.version>
        <jena.version>2.12.0</jena.version>
        <slf4j.version>1.6.1</slf4j.version>
        <!-- 'root.basedir' is the path to the root [dspace-src] dir. It must be redefined by each child POM,