mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
Merge pull request #732 from rradillen/DS-2253
DS-2253: bumping solr version
This commit is contained in:
@@ -81,6 +81,7 @@
|
||||
<exclude>**/src/test/data/**</exclude>
|
||||
<exclude>**/.gitignore</exclude>
|
||||
<exclude>src/test/data/dspaceFolder/config/spiders/**</exclude>
|
||||
<exclude>src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
@@ -515,21 +516,7 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-cell</artifactId>
|
||||
<version>${solr.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-configuration</groupId>
|
||||
<artifactId>commons-configuration</artifactId>
|
||||
@@ -553,7 +540,7 @@
|
||||
<dependency>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
<artifactId>elasticsearch</artifactId>
|
||||
<version>0.90.3</version>
|
||||
<version>1.3.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -621,6 +608,11 @@
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
<version>2.3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.extraction;
|
||||
|
||||
|
||||
/**
|
||||
* The various Solr Parameters names to use when extracting content.
|
||||
*
|
||||
**/
|
||||
public interface ExtractingParams {
|
||||
|
||||
/**
|
||||
* Map all generated attribute names to field names with lowercase and underscores.
|
||||
*/
|
||||
public static final String LOWERNAMES = "lowernames";
|
||||
|
||||
/**
|
||||
* if true, ignore TikaException (give up to extract text but index meta data)
|
||||
*/
|
||||
public static final String IGNORE_TIKA_EXCEPTION = "ignoreTikaException";
|
||||
|
||||
|
||||
/**
|
||||
* The param prefix for mapping Tika metadata to Solr fields.
|
||||
* <p/>
|
||||
* To map a field, add a name like:
|
||||
* <pre>fmap.title=solr.title</pre>
|
||||
*
|
||||
* In this example, the tika "title" metadata value will be added to a Solr field named "solr.title"
|
||||
*
|
||||
*
|
||||
*/
|
||||
public static final String MAP_PREFIX = "fmap.";
|
||||
|
||||
/**
|
||||
* The boost value for the name of the field. The boost can be specified by a name mapping.
|
||||
* <p/>
|
||||
* For example
|
||||
* <pre>
|
||||
* map.title=solr.title
|
||||
* boost.solr.title=2.5
|
||||
* </pre>
|
||||
* will boost the solr.title field for this document by 2.5
|
||||
*
|
||||
*/
|
||||
public static final String BOOST_PREFIX = "boost.";
|
||||
|
||||
/**
|
||||
* Pass in literal values to be added to the document, as in
|
||||
* <pre>
|
||||
* literal.myField=Foo
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public static final String LITERALS_PREFIX = "literal.";
|
||||
|
||||
|
||||
/**
|
||||
* Restrict the extracted parts of a document to be indexed
|
||||
* by passing in an XPath expression. All content that satisfies the XPath expr.
|
||||
* will be passed to the {@link SolrContentHandler}.
|
||||
* <p/>
|
||||
* See Tika's docs for what the extracted document looks like.
|
||||
* <p/>
|
||||
* @see #CAPTURE_ELEMENTS
|
||||
*/
|
||||
public static final String XPATH_EXPRESSION = "xpath";
|
||||
|
||||
|
||||
/**
|
||||
* Only extract and return the content, do not index it.
|
||||
*/
|
||||
public static final String EXTRACT_ONLY = "extractOnly";
|
||||
|
||||
/**
|
||||
* Content output format if extractOnly is true. Default is "xml", alternative is "text".
|
||||
*/
|
||||
public static final String EXTRACT_FORMAT = "extractFormat";
|
||||
|
||||
/**
|
||||
* Capture attributes separately according to the name of the element, instead of just adding them to the string buffer
|
||||
*/
|
||||
public static final String CAPTURE_ATTRIBUTES = "captureAttr";
|
||||
|
||||
/**
|
||||
* Literal field values will by default override other values such as metadata and content. Set this to false to revert to pre-4.0 behaviour
|
||||
*/
|
||||
public static final String LITERALS_OVERRIDE = "literalsOverride";
|
||||
|
||||
/**
|
||||
* Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default. This is different
|
||||
* then the case of passing in an XPath expression.
|
||||
* <p/>
|
||||
* The Capture field is based on the localName returned to the {@link SolrContentHandler}
|
||||
* by Tika, not to be confused by the mapped field. The field name can then
|
||||
* be mapped into the index schema.
|
||||
* <p/>
|
||||
* For instance, a Tika document may look like:
|
||||
* <pre>
|
||||
* <html>
|
||||
* ...
|
||||
* <body>
|
||||
* <p>some text here. <div>more text</div></p>
|
||||
* Some more text
|
||||
* </body>
|
||||
* </pre>
|
||||
* By passing in the p tag, you could capture all P tags separately from the rest of the t
|
||||
* Thus, in the example, the capture of the P tag would be: "some text here. more text"
|
||||
*
|
||||
*/
|
||||
public static final String CAPTURE_ELEMENTS = "capture";
|
||||
|
||||
/**
|
||||
* The type of the stream. If not specified, Tika will use mime type detection.
|
||||
*/
|
||||
public static final String STREAM_TYPE = "stream.type";
|
||||
|
||||
|
||||
/**
|
||||
* Optional. The file name. If specified, Tika can take this into account while
|
||||
* guessing the MIME type.
|
||||
*/
|
||||
public static final String RESOURCE_NAME = "resource.name";
|
||||
|
||||
/**
|
||||
* Optional. The password for this resource. Will be used instead of the rule based password lookup mechanisms
|
||||
*/
|
||||
public static final String RESOURCE_PASSWORD = "resource.password";
|
||||
|
||||
/**
|
||||
* Optional. If specified, the prefix will be prepended to all Metadata, such that it would be possible
|
||||
* to setup a dynamic field to automatically capture it
|
||||
*/
|
||||
public static final String UNKNOWN_FIELD_PREFIX = "uprefix";
|
||||
|
||||
/**
|
||||
* Optional. If specified and the name of a potential field cannot be determined, the default Field specified
|
||||
* will be used instead.
|
||||
*/
|
||||
public static final String DEFAULT_FIELD = "defaultField";
|
||||
|
||||
/**
|
||||
* Optional. If specified, loads the file as a source for password lookups for Tika encrypted documents.
|
||||
* <p>
|
||||
* File format is Java properties format with one key=value per line.
|
||||
* The key is evaluated as a regex against the file name, and the value is the password
|
||||
* The rules are evaluated top-bottom, i.e. the first match will be used
|
||||
* If you want a fallback password to be always used, supply a .*=<defaultmypassword> at the end
|
||||
*/
|
||||
public static final String PASSWORD_MAP_FILE = "passwordsFile";
|
||||
}
|
@@ -14,7 +14,7 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
|
||||
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.dspace.core.Context;
|
||||
|
@@ -32,7 +32,6 @@ import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.Collection;
|
||||
import org.dspace.core.*;
|
||||
import org.dspace.utils.DSpace;
|
||||
import org.elasticsearch.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Servlet to batch import metadata via the BTE
|
||||
|
@@ -45,7 +45,6 @@
|
||||
<%@page import="org.dspace.core.Constants"%>
|
||||
<%@page import="org.dspace.eperson.EPerson"%>
|
||||
<%@page import="org.dspace.versioning.VersionHistory"%>
|
||||
<%@page import="org.elasticsearch.common.trove.strategy.HashingStrategy"%>
|
||||
<%
|
||||
// Attributes
|
||||
Boolean displayAllBoolean = (Boolean) request.getAttribute("display.all");
|
||||
|
@@ -146,16 +146,8 @@
|
||||
<artifactId>solr-solrj</artifactId>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-core</artifactId>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
<!-- Web API -->
|
||||
<dependency>
|
||||
|
@@ -31,8 +31,8 @@
|
||||
<packaging>war</packaging>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.4.0</lucene.version>
|
||||
<solr.version>4.4.0</solr.version>
|
||||
|
||||
<solr.version>4.10.2</solr.version>
|
||||
<!-- 'root.basedir' is the path to the root [dspace-src] dir. It must be redefined by each child POM,
|
||||
as it is used to reference the LICENSE_HEADER and *.properties file(s) in that directory. -->
|
||||
<root.basedir>${basedir}/..</root.basedir>
|
||||
@@ -131,6 +131,54 @@
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-continuation</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-deploy</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-http</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-io</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-jmx</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-security</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-webapp</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-xml</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
@@ -142,10 +190,56 @@
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-cell</artifactId>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-continuation</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-deploy</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-http</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-io</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-jmx</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-security</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-webapp</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-xml</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Replace J.U.L. logging with log4j -->
|
||||
|
@@ -1,834 +0,0 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.request.SimpleFacets;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* TODO!
|
||||
*
|
||||
* @version $Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class FacetComponent extends SearchComponent
|
||||
{
|
||||
public static final String COMPONENT_NAME = "facet";
|
||||
|
||||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException
|
||||
{
|
||||
if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
|
||||
rb.setNeedDocSet( true );
|
||||
rb.doFacets = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually run the query
|
||||
* @param rb
|
||||
*/
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException
|
||||
{
|
||||
if (rb.doFacets) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
SimpleFacets f = new SimpleFacets(rb.req,
|
||||
rb.getResults().docSet,
|
||||
params,
|
||||
rb );
|
||||
|
||||
// TODO ???? add this directly to the response, or to the builder?
|
||||
rb.rsp.add( "facet_counts", f.getFacetCounts() );
|
||||
}
|
||||
}
|
||||
|
||||
private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
|
||||
|
||||
@Override
|
||||
public int distributedProcess(ResponseBuilder rb) throws IOException {
|
||||
if (!rb.doFacets) {
|
||||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
// overlap facet refinement requests (those shards that we need a count for
|
||||
// particular facet values from), where possible, with
|
||||
// the requests to get fields (because we know that is the
|
||||
// only other required phase).
|
||||
// We do this in distributedProcess so we can look at all of the
|
||||
// requests in the outgoing queue at once.
|
||||
|
||||
|
||||
|
||||
for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
|
||||
List<String> refinements = null;
|
||||
|
||||
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
|
||||
if (!dff.needRefinements) continue;
|
||||
List<String> refList = dff._toRefine[shardNum];
|
||||
if (refList == null || refList.size()==0) continue;
|
||||
|
||||
String key = dff.getKey(); // reuse the same key that was used for the main facet
|
||||
String termsKey = key + "__terms";
|
||||
String termsVal = StrUtils.join(refList, ',');
|
||||
|
||||
String facetCommand;
|
||||
// add terms into the original facet.field command
|
||||
// do it via parameter reference to avoid another layer of encoding.
|
||||
|
||||
String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
|
||||
if (dff.localParams != null) {
|
||||
facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
|
||||
} else {
|
||||
facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field;
|
||||
}
|
||||
|
||||
if (refinements == null) {
|
||||
refinements = new ArrayList<String>();
|
||||
}
|
||||
|
||||
refinements.add(facetCommand);
|
||||
refinements.add(termsKey);
|
||||
refinements.add(termsVal);
|
||||
}
|
||||
|
||||
if (refinements == null) continue;
|
||||
|
||||
|
||||
String shard = rb.shards[shardNum];
|
||||
ShardRequest refine = null;
|
||||
boolean newRequest = false;
|
||||
|
||||
// try to find a request that is already going out to that shard.
|
||||
// If nshards becomes to great, we way want to move to hashing for better
|
||||
// scalability.
|
||||
for (ShardRequest sreq : rb.outgoing) {
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
|
||||
&& sreq.shards != null
|
||||
&& sreq.shards.length==1
|
||||
&& sreq.shards[0].equals(shard))
|
||||
{
|
||||
refine = sreq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (refine == null) {
|
||||
// we didn't find any other suitable requests going out to that shard, so
|
||||
// create one ourselves.
|
||||
newRequest = true;
|
||||
refine = new ShardRequest();
|
||||
refine.shards = new String[]{rb.shards[shardNum]};
|
||||
refine.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
// don't request any documents
|
||||
refine.params.remove(CommonParams.START);
|
||||
refine.params.set(CommonParams.ROWS,"0");
|
||||
}
|
||||
|
||||
refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
|
||||
refine.params.set(FacetParams.FACET, "true");
|
||||
refine.params.remove(FacetParams.FACET_FIELD);
|
||||
refine.params.remove(FacetParams.FACET_QUERY);
|
||||
|
||||
for (int i=0; i<refinements.size();) {
|
||||
String facetCommand=refinements.get(i++);
|
||||
String termsKey=refinements.get(i++);
|
||||
String termsVal=refinements.get(i++);
|
||||
|
||||
refine.params.add(FacetParams.FACET_FIELD, facetCommand);
|
||||
refine.params.set(termsKey, termsVal);
|
||||
}
|
||||
|
||||
if (newRequest) {
|
||||
rb.addRequest(this, refine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
|
||||
if (!rb.doFacets) return;
|
||||
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
|
||||
sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS;
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
if (fi == null) {
|
||||
rb._facetInfo = fi = new FacetInfo();
|
||||
fi.parse(rb.req.getParams(), rb);
|
||||
// should already be true...
|
||||
// sreq.params.set(FacetParams.FACET, "true");
|
||||
}
|
||||
|
||||
sreq.params.remove(FacetParams.FACET_MINCOUNT);
|
||||
sreq.params.remove(FacetParams.FACET_OFFSET);
|
||||
sreq.params.remove(FacetParams.FACET_LIMIT);
|
||||
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
String paramStart = "f." + dff.field + '.';
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
|
||||
|
||||
dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit;
|
||||
|
||||
if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
|
||||
if (dff.limit > 0) {
|
||||
// set the initial limit higher to increase accuracy
|
||||
dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
|
||||
dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
|
||||
} else {
|
||||
// if limit==-1, then no need to artificially lower mincount to 0 if it's 1
|
||||
dff.initialMincount = Math.min(dff.minCount, 1);
|
||||
}
|
||||
} else {
|
||||
// we're sorting by index order.
|
||||
// if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
|
||||
// if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
|
||||
// if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
|
||||
// For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
|
||||
// For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
|
||||
// For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
|
||||
// we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as
|
||||
// much as when sorting by count.
|
||||
if (dff.minCount <= 1) {
|
||||
dff.initialMincount = dff.minCount;
|
||||
} else {
|
||||
dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.shards.length);
|
||||
// dff.initialMincount = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (dff.initialMincount != 0) {
|
||||
sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
|
||||
}
|
||||
|
||||
// Currently this is for testing only and allows overriding of the
|
||||
// facet.limit set to the shards
|
||||
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
|
||||
|
||||
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
|
||||
}
|
||||
} else {
|
||||
// turn off faceting on other requests
|
||||
sreq.params.set(FacetParams.FACET, "false");
|
||||
// we could optionally remove faceting params
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
|
||||
if (!rb.doFacets) return;
|
||||
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
|
||||
countFacets(rb, sreq);
|
||||
} else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
|
||||
refineFacets(rb, sreq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
for (ShardResponse srsp: sreq.responses) {
|
||||
int shardNum = rb.getShardNum(srsp.getShard());
|
||||
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
|
||||
|
||||
// handle facet queries
|
||||
NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
|
||||
if (facet_queries != null) {
|
||||
for (int i=0; i<facet_queries.size(); i++) {
|
||||
String returnedKey = facet_queries.getName(i);
|
||||
long count = ((Number)facet_queries.getVal(i)).longValue();
|
||||
QueryFacet qf = fi.queryFacets.get(returnedKey);
|
||||
qf.count += count;
|
||||
}
|
||||
}
|
||||
|
||||
// step through each facet.field, adding results from this shard
|
||||
NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
|
||||
|
||||
if (facet_fields != null) {
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
|
||||
}
|
||||
}
|
||||
|
||||
// Distributed facet_dates
|
||||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_dates as the basis for subsequent shards' data to be merged.
|
||||
// (the "NOW" param should ensure consistency)
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
|
||||
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
|
||||
facet_counts.get("facet_dates");
|
||||
|
||||
if (facet_dates != null) {
|
||||
|
||||
// go through each facet_date
|
||||
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
|
||||
final String field = entry.getKey();
|
||||
if (fi.dateFacets.get(field) == null) {
|
||||
// first time we've seen this field, no merging
|
||||
fi.dateFacets.add(field, entry.getValue());
|
||||
|
||||
} else {
|
||||
// not the first time, merge current field
|
||||
|
||||
SimpleOrderedMap<Object> shardFieldValues
|
||||
= entry.getValue();
|
||||
SimpleOrderedMap<Object> existFieldValues
|
||||
= fi.dateFacets.get(field);
|
||||
|
||||
for (Map.Entry<String,Object> existPair : existFieldValues) {
|
||||
final String key = existPair.getKey();
|
||||
if (key.equals("gap") ||
|
||||
key.equals("end") ||
|
||||
key.equals("start")) {
|
||||
// we can skip these, must all be the same across shards
|
||||
continue;
|
||||
}
|
||||
// can be null if inconsistencies in shards responses
|
||||
Integer newValue = (Integer) shardFieldValues.get(key);
|
||||
if (null != newValue) {
|
||||
Integer oldValue = ((Integer) existPair.getValue());
|
||||
existPair.setValue(oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Distributed facet_ranges
|
||||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_ranges as the basis for subsequent shards' data to be merged.
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
|
||||
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
|
||||
facet_counts.get("facet_ranges");
|
||||
|
||||
if (facet_ranges != null) {
|
||||
|
||||
// go through each facet_range
|
||||
for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
|
||||
final String field = entry.getKey();
|
||||
if (fi.rangeFacets.get(field) == null) {
|
||||
// first time we've seen this field, no merging
|
||||
fi.rangeFacets.add(field, entry.getValue());
|
||||
|
||||
} else {
|
||||
// not the first time, merge current field counts
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Integer> shardFieldValues
|
||||
= (NamedList<Integer>) entry.getValue().get("counts");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Integer> existFieldValues
|
||||
= (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
|
||||
|
||||
for (Map.Entry<String,Integer> existPair : existFieldValues) {
|
||||
final String key = existPair.getKey();
|
||||
// can be null if inconsistencies in shards responses
|
||||
Integer newValue = shardFieldValues.get(key);
|
||||
if (null != newValue) {
|
||||
Integer oldValue = existPair.getValue();
|
||||
existPair.setValue(oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// This code currently assumes that there will be only a single
|
||||
// request ((with responses from all shards) sent out to get facets...
|
||||
// otherwise we would need to wait until all facet responses were received.
|
||||
//
|
||||
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
// no need to check these facets for refinement
|
||||
if (dff.initialLimit <= 0 && dff.initialMincount == 0) continue;
|
||||
|
||||
// only other case where index-sort doesn't need refinement is if minCount==0
|
||||
if (dff.minCount == 0 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
|
||||
|
||||
@SuppressWarnings("unchecked") // generic array's are annoying
|
||||
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
|
||||
dff._toRefine = tmp;
|
||||
|
||||
ShardFacetCount[] counts = dff.getCountSorted();
|
||||
int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
|
||||
long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count;
|
||||
|
||||
for (int i=0; i<counts.length; i++) {
|
||||
ShardFacetCount sfc = counts[i];
|
||||
boolean needRefinement = false;
|
||||
|
||||
if (i<ntop) {
|
||||
// automatically flag the top values for refinement
|
||||
// this should always be true for facet.sort=index
|
||||
needRefinement = true;
|
||||
} else {
|
||||
// this logic should only be invoked for facet.sort=index (for now)
|
||||
|
||||
// calculate the maximum value that this term may have
|
||||
// and if it is >= smallestCount, then flag for refinement
|
||||
long maxCount = sfc.count;
|
||||
for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
|
||||
OpenBitSet obs = dff.counted[shardNum];
|
||||
if (!obs.get(sfc.termNum)) {
|
||||
// if missing from this shard, add the max it could be
|
||||
maxCount += dff.maxPossible(sfc,shardNum);
|
||||
}
|
||||
}
|
||||
if (maxCount >= smallestCount) {
|
||||
// TODO: on a tie, we could check the term values
|
||||
needRefinement = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (needRefinement) {
|
||||
// add a query for each shard missing the term that needs refinement
|
||||
for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
|
||||
OpenBitSet obs = dff.counted[shardNum];
|
||||
if (!obs.get(sfc.termNum) && dff.maxPossible(sfc,shardNum)>0) {
|
||||
dff.needRefinements = true;
|
||||
List<String> lst = dff._toRefine[shardNum];
|
||||
if (lst == null) {
|
||||
lst = dff._toRefine[shardNum] = new ArrayList<String>();
|
||||
}
|
||||
lst.add(sfc.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
for (ShardResponse srsp: sreq.responses) {
|
||||
// int shardNum = rb.getShardNum(srsp.shard);
|
||||
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
|
||||
NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
|
||||
|
||||
if (facet_fields == null) continue; // this can happen when there's an exception
|
||||
|
||||
for (int i=0; i<facet_fields.size(); i++) {
|
||||
String key = facet_fields.getName(i);
|
||||
DistribFieldFacet dff = fi.facets.get(key);
|
||||
if (dff == null) continue;
|
||||
|
||||
NamedList shardCounts = (NamedList)facet_fields.getVal(i);
|
||||
|
||||
for (int j=0; j<shardCounts.size(); j++) {
|
||||
String name = shardCounts.getName(j);
|
||||
long count = ((Number)shardCounts.getVal(j)).longValue();
|
||||
ShardFacetCount sfc = dff.counts.get(name);
|
||||
sfc.count += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
|
||||
// wait until STAGE_GET_FIELDS
|
||||
// so that "result" is already stored in the response (for aesthetics)
|
||||
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
NamedList<Object> facet_counts = new SimpleOrderedMap<Object>();
|
||||
|
||||
NamedList facet_queries = new SimpleOrderedMap();
|
||||
facet_counts.add("facet_queries",facet_queries);
|
||||
for (QueryFacet qf : fi.queryFacets.values()) {
|
||||
facet_queries.add(qf.getKey(), num(qf.count));
|
||||
}
|
||||
|
||||
NamedList<Object> facet_fields = new SimpleOrderedMap<Object>();
|
||||
facet_counts.add("facet_fields", facet_fields);
|
||||
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
NamedList<Object> fieldCounts = new NamedList<Object>(); // order is more important for facets
|
||||
facet_fields.add(dff.getKey(), fieldCounts);
|
||||
|
||||
ShardFacetCount[] counts;
|
||||
boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT);
|
||||
if (countSorted) {
|
||||
counts = dff.countSorted;
|
||||
if (counts == null || dff.needRefinements) {
|
||||
counts = dff.getCountSorted();
|
||||
}
|
||||
} else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) {
|
||||
counts = dff.getLexSorted();
|
||||
} else { // TODO: log error or throw exception?
|
||||
counts = dff.getLexSorted();
|
||||
}
|
||||
|
||||
if (countSorted) {
|
||||
int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
|
||||
for (int i=dff.offset; i<end; i++) {
|
||||
if (counts[i].count < dff.minCount) {
|
||||
break;
|
||||
}
|
||||
fieldCounts.add(counts[i].name, num(counts[i].count));
|
||||
}
|
||||
} else {
|
||||
int off = dff.offset;
|
||||
int lim = dff.limit >= 0 ? dff.limit : Integer.MAX_VALUE;
|
||||
|
||||
// index order...
|
||||
for (int i=0; i<counts.length; i++) {
|
||||
long count = counts[i].count;
|
||||
if (count < dff.minCount) continue;
|
||||
if (off > 0) {
|
||||
off--;
|
||||
continue;
|
||||
}
|
||||
if (lim <= 0) {
|
||||
break;
|
||||
}
|
||||
lim--;
|
||||
fieldCounts.add(counts[i].name, num(count));
|
||||
}
|
||||
}
|
||||
|
||||
if (dff.missing) {
|
||||
fieldCounts.add(null, num(dff.missingCount));
|
||||
}
|
||||
}
|
||||
|
||||
facet_counts.add("facet_dates", fi.dateFacets);
|
||||
facet_counts.add("facet_ranges", fi.rangeFacets);
|
||||
|
||||
rb.rsp.add("facet_counts", facet_counts);
|
||||
|
||||
rb._facetInfo = null; // could be big, so release asap
|
||||
}
|
||||
|
||||
|
||||
// use <int> tags for smaller facet counts (better back compatibility)
|
||||
private Number num(long val) {
|
||||
if (val < Integer.MAX_VALUE) return (int)val;
|
||||
else return val;
|
||||
}
|
||||
private Number num(Long val) {
|
||||
if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
|
||||
else return val;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
/// SolrInfoMBean
|
||||
////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Handle Faceting";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision: 1152531 $";
|
||||
}
|
||||
|
||||
|
||||
public String getSourceId() {
|
||||
return "$Id: FacetComponent.java 1152531 2011-07-31 00:43:33Z koji $";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL: http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_5_0/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java $";
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL[] getDocs() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class FacetInfo {
|
||||
public LinkedHashMap<String,QueryFacet> queryFacets;
|
||||
public LinkedHashMap<String,DistribFieldFacet> facets;
|
||||
public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
|
||||
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
|
||||
public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
|
||||
= new SimpleOrderedMap<SimpleOrderedMap<Object>>();
|
||||
|
||||
public List exceptionList;
|
||||
|
||||
void parse(SolrParams params, ResponseBuilder rb) {
|
||||
queryFacets = new LinkedHashMap<String,QueryFacet>();
|
||||
facets = new LinkedHashMap<String,DistribFieldFacet>();
|
||||
|
||||
String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
|
||||
if (facetQs != null) {
|
||||
for (String query : facetQs) {
|
||||
QueryFacet queryFacet = new QueryFacet(rb, query);
|
||||
queryFacets.put(queryFacet.getKey(), queryFacet);
|
||||
}
|
||||
}
|
||||
|
||||
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
|
||||
if (facetFs != null) {
|
||||
|
||||
for (String field : facetFs) {
|
||||
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
|
||||
facets.put(ff.getKey(), ff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class FacetBase {
|
||||
String facetType; // facet.field, facet.query, etc (make enum?)
|
||||
String facetStr; // original parameter value of facetStr
|
||||
String facetOn; // the field or query, absent localParams if appropriate
|
||||
private String key; // label in the response for the result... "foo" for {!key=foo}myfield
|
||||
SolrParams localParams; // any local params for the facet
|
||||
|
||||
public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
|
||||
this.facetType = facetType;
|
||||
this.facetStr = facetStr;
|
||||
try
|
||||
{
|
||||
this.localParams = QueryParsing.getLocalParams(facetStr,
|
||||
rb.req.getParams());
|
||||
}
|
||||
catch (SyntaxError e)
|
||||
{
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
this.facetOn = facetStr;
|
||||
this.key = facetStr;
|
||||
|
||||
if (localParams != null) {
|
||||
// remove local params unless it's a query
|
||||
if (!facetType.equals(FacetParams.FACET_QUERY)) {
|
||||
facetOn = localParams.get(CommonParams.VALUE);
|
||||
key = facetOn;
|
||||
}
|
||||
|
||||
key = localParams.get(CommonParams.OUTPUT_KEY, key);
|
||||
}
|
||||
}
|
||||
|
||||
/** returns the key in the response that this facet will be under */
|
||||
public String getKey() { return key; }
|
||||
public String getType() { return facetType; }
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class QueryFacet extends FacetBase {
|
||||
public long count;
|
||||
|
||||
public QueryFacet(ResponseBuilder rb, String facetStr) {
|
||||
super(rb, FacetParams.FACET_QUERY, facetStr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class FieldFacet extends FacetBase {
|
||||
public String field; // the field to facet on... "myfield" for {!key=foo}myfield
|
||||
public FieldType ftype;
|
||||
public int offset;
|
||||
public int limit;
|
||||
public int minCount;
|
||||
public String sort;
|
||||
public boolean missing;
|
||||
public String prefix;
|
||||
public long missingCount;
|
||||
|
||||
public FieldFacet(ResponseBuilder rb, String facetStr) {
|
||||
super(rb, FacetParams.FACET_FIELD, facetStr);
|
||||
fillParams(rb, rb.req.getParams(), facetOn);
|
||||
}
|
||||
|
||||
private void fillParams(ResponseBuilder rb, SolrParams params, String field) {
|
||||
this.field = field;
|
||||
this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
|
||||
this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
|
||||
this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
|
||||
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
|
||||
if (mincount==null) {
|
||||
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
|
||||
// mincount = (zeros!=null && zeros) ? 0 : 1;
|
||||
mincount = (zeros!=null && !zeros) ? 1 : 0;
|
||||
// current default is to include zeros.
|
||||
}
|
||||
this.minCount = mincount;
|
||||
this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
|
||||
// default to sorting by count if there is a limit.
|
||||
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
|
||||
if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
|
||||
this.sort = FacetParams.FACET_SORT_COUNT;
|
||||
} else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
|
||||
this.sort = FacetParams.FACET_SORT_INDEX;
|
||||
}
|
||||
this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class DistribFieldFacet extends FieldFacet {
|
||||
public List<String>[] _toRefine; // a List<String> of refinements needed, one for each shard.
|
||||
|
||||
// SchemaField sf; // currently unneeded
|
||||
|
||||
// the max possible count for a term appearing on no list
|
||||
public long missingMaxPossible;
|
||||
// the max possible count for a missing term for each shard (indexed by shardNum)
|
||||
public long[] missingMax;
|
||||
public OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
|
||||
public HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128);
|
||||
public int termNum;
|
||||
|
||||
public int initialLimit; // how many terms requested in first phase
|
||||
public int initialMincount; // mincount param sent to each shard
|
||||
public boolean needRefinements;
|
||||
public ShardFacetCount[] countSorted;
|
||||
|
||||
DistribFieldFacet(ResponseBuilder rb, String facetStr) {
|
||||
super(rb, facetStr);
|
||||
// sf = rb.req.getSchema().getField(field);
|
||||
missingMax = new long[rb.shards.length];
|
||||
counted = new OpenBitSet[rb.shards.length];
|
||||
}
|
||||
|
||||
void add(int shardNum, NamedList shardCounts, int numRequested) {
|
||||
// shardCounts could be null if there was an exception
|
||||
int sz = shardCounts == null ? 0 : shardCounts.size();
|
||||
int numReceived = sz;
|
||||
|
||||
OpenBitSet terms = new OpenBitSet(termNum+sz);
|
||||
|
||||
long last = 0;
|
||||
for (int i=0; i<sz; i++) {
|
||||
String name = shardCounts.getName(i);
|
||||
long count = ((Number)shardCounts.getVal(i)).longValue();
|
||||
if (name == null) {
|
||||
missingCount += count;
|
||||
numReceived--;
|
||||
} else {
|
||||
ShardFacetCount sfc = counts.get(name);
|
||||
if (sfc == null) {
|
||||
sfc = new ShardFacetCount();
|
||||
sfc.name = name;
|
||||
sfc.indexed = ftype == null ? sfc.name : ftype.toInternal(sfc.name);
|
||||
sfc.termNum = termNum++;
|
||||
counts.put(name, sfc);
|
||||
}
|
||||
sfc.count += count;
|
||||
terms.fastSet(sfc.termNum);
|
||||
last = count;
|
||||
}
|
||||
}
|
||||
|
||||
// the largest possible missing term is initialMincount if we received less
|
||||
// than the number requested.
|
||||
if (numRequested<0 || numRequested != 0 && numReceived < numRequested) {
|
||||
last = initialMincount;
|
||||
}
|
||||
|
||||
missingMaxPossible += last;
|
||||
missingMax[shardNum] = last;
|
||||
counted[shardNum] = terms;
|
||||
}
|
||||
|
||||
public ShardFacetCount[] getLexSorted() {
|
||||
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
|
||||
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
|
||||
return o1.indexed.compareTo(o2.indexed);
|
||||
}
|
||||
});
|
||||
countSorted = arr;
|
||||
return arr;
|
||||
}
|
||||
|
||||
public ShardFacetCount[] getCountSorted() {
|
||||
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
|
||||
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
|
||||
if (o2.count < o1.count) return -1;
|
||||
else if (o1.count < o2.count) return 1;
|
||||
return o1.indexed.compareTo(o2.indexed);
|
||||
}
|
||||
});
|
||||
countSorted = arr;
|
||||
return arr;
|
||||
}
|
||||
|
||||
// returns the max possible value this ShardFacetCount could have for this shard
|
||||
// (assumes the shard did not report a count for this value)
|
||||
long maxPossible(ShardFacetCount sfc, int shardNum) {
|
||||
return missingMax[shardNum];
|
||||
// TODO: could store the last term in the shard to tell if this term
|
||||
// comes before or after it. If it comes before, we could subtract 1
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class ShardFacetCount {
|
||||
public String name;
|
||||
public String indexed; // the indexed form of the name... used for comparisons.
|
||||
public long count;
|
||||
public int termNum; // term number starting at 0 (used in bit arrays)
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{term="+name+",termNum="+termNum+",count="+count+"}";
|
||||
}
|
||||
}
|
||||
}
|
@@ -94,17 +94,7 @@
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-core</artifactId>
|
||||
<version>${solr.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-jdk14</artifactId>
|
||||
|
@@ -39,7 +39,7 @@
|
||||
need to take precedence over the solr-core, the solr-core will still be loaded in the solr-core.jar
|
||||
-->
|
||||
<excludes>
|
||||
<exclude>WEB-INF/lib/apache-solr-core-4.4.0.jar</exclude>
|
||||
<exclude>WEB-INF/lib/apache-solr-core-4.10.2.jar</exclude>
|
||||
<!--Also ensure we use the DSpace solr web.xml file else our localhost filter will not work !-->
|
||||
<exclude>WEB-INF/web.xml</exclude>
|
||||
</excludes>
|
||||
@@ -73,7 +73,57 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-analysis-extras</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<version>${solr.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-continuation</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-deploy</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-http</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-io</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-jmx</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-security</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-webapp</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>jetty-xml</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -84,19 +134,19 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-icu</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-smartcn</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-stempel</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<version>${solr.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- support odd cases where JAXP cannot be found in JVM -->
|
||||
|
@@ -39,7 +39,7 @@
|
||||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||
<luceneMatchVersion>4.10</luceneMatchVersion>
|
||||
|
||||
<!-- lib directives can be used to instruct Solr to load an Jars identified
|
||||
and use them to resolve any "plugins" specified in your solrconfig.xml or
|
||||
|
@@ -35,7 +35,7 @@
|
||||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||
<luceneMatchVersion>4.10</luceneMatchVersion>
|
||||
|
||||
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
||||
identified and use them to resolve any "plugins" specified in
|
||||
|
@@ -526,16 +526,16 @@
|
||||
<field name="search.resourcetype" type="sint" indexed="true" stored="true" required="true" omitNorms="true" />
|
||||
|
||||
<!-- All object placed in Discovery must have an unique id (for standard DSpaceObject it is resourceID-resourceTypeID) -->
|
||||
<field name="search.uniqueid" type="string" indexed="true" stored="true" required="true" omitNorms="true" />
|
||||
<field name="search.uniqueid" type="string" indexed="true" stored="true" required="true" omitNorms="true" docValues="true"/>
|
||||
|
||||
<!-- All Items/Communities/Collections placed in Discovery should have an handle -->
|
||||
<field name="handle" type="string" indexed="true" stored="true" omitNorms="true" />
|
||||
<field name="handle" type="string" indexed="true" stored="true" omitNorms="true" docValues="true" />
|
||||
|
||||
<field name="withdrawn" type="string" indexed="true" stored="true" omitNorms="true" />
|
||||
<field name="withdrawn" type="string" indexed="true" stored="true" omitNorms="true" docValues="true" />
|
||||
|
||||
<field name="discoverable" type="string" indexed="true" stored="true" omitNorms="true" />
|
||||
<field name="discoverable" type="string" indexed="true" stored="true" omitNorms="true" docValues="true" />
|
||||
|
||||
<field name="read" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true" />
|
||||
<field name="read" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true" docValues="true" />
|
||||
|
||||
<!-- Community and collection hierarchy of the Item of interest (candidate for hierarchical facetting ) -->
|
||||
<field name="location" type="lowerCaseSort" indexed="true" stored="true" multiValued="true" required="false" omitNorms="true" />
|
||||
@@ -581,7 +581,7 @@
|
||||
|
||||
<!--Date matching-->
|
||||
<dynamicField name="*.year" type="sint" indexed="true" stored="true" multiValued="true" omitNorms="true" />
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true" multiValued="false" omitNorms="true" />
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true" multiValued="false" omitNorms="true" docValues="true"/>
|
||||
|
||||
|
||||
<!--Used for matching on all other fields -->
|
||||
@@ -598,21 +598,21 @@
|
||||
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
||||
Longer patterns will be matched first. if equal size patterns
|
||||
both match, the first appearing in the schema will be used. -->
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true" />
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true" />
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true" docValues="true"/>
|
||||
<!--<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>-->
|
||||
|
||||
<!-- some trie-coded dynamic fields for faster range queries -->
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true" docValues="true"/>
|
||||
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true" docValues="true"/>
|
||||
|
||||
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
||||
|
||||
|
@@ -35,7 +35,7 @@
|
||||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||
<luceneMatchVersion>4.10</luceneMatchVersion>
|
||||
|
||||
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
||||
identified and use them to resolve any "plugins" specified in
|
||||
|
@@ -287,42 +287,42 @@
|
||||
|
||||
<field name="type" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="id" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="ip" type="string" indexed="true" stored="true" required="false" />
|
||||
<field name="ip" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="time" type="date" indexed="true" stored="true" required="true" />
|
||||
<field name="epersonid" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="continent" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="country" type="string" indexed="true" stored="true" required="false" />
|
||||
<field name="countryCode" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="city" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="continent" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="country" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="countryCode" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="city" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="longitude" type="float" indexed="true" stored="true" required="false" />
|
||||
<field name="latitude" type="float" indexed="true" stored="true" required="false" />
|
||||
<field name="owningComm" type="integer" indexed="true" stored="true" required="false" multiValued="true" />
|
||||
<field name="owningColl" type="integer" indexed="true" stored="true" required="false" multiValued="true" />
|
||||
<field name="owningItem" type="integer" indexed="true" stored="true" required="false" multiValued="true" />
|
||||
<field name="dns" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="userAgent" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="isBot" type="boolean" indexed="true" stored="true" required="false"/>
|
||||
<field name="bundleName" type="string" indexed="true" stored="true" required="false" multiValued="true" />
|
||||
<field name="referrer" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="owningItem" type="integer" indexed="true" stored="true" required="false" multiValued="true" />
|
||||
<field name="dns" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="userAgent" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="isBot" type="boolean" indexed="true" stored="true" required="false" />
|
||||
<field name="bundleName" type="string" indexed="true" stored="true" required="false" multiValued="true" docValues="true"/>
|
||||
<field name="referrer" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<!-- use uuid as uniqueKey update processor chain, see solrconfig.xml-->
|
||||
<field name="uid" type="string" indexed="true" stored="true" required="true" />
|
||||
<field name="uid" type="string" indexed="true" stored="true" required="true" docValues="true"/>
|
||||
|
||||
<!--Can either be view/search/search_result/workflow-->
|
||||
<field name="statistics_type" type="string" indexed="true" stored="true" required="true" default="view" />
|
||||
<field name="statistics_type" type="string" indexed="true" stored="true" required="true" default="view" docValues="true"/>
|
||||
|
||||
<!-- Search specific -->
|
||||
<field name="query" type="string" indexed="true" stored="true" required="false" multiValued="true"/>
|
||||
<field name="query" type="string" indexed="true" stored="true" required="false" multiValued="true" docValues="true"/>
|
||||
<field name="scopeType" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="scopeId" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="rpp" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="sortBy" type="string" indexed="true" stored="true" required="false" />
|
||||
<field name="sortOrder" type="string" indexed="true" stored="true" required="false" />
|
||||
<field name="sortBy" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="sortOrder" type="string" indexed="true" stored="true" required="false" docValues="true"/>
|
||||
<field name="page" type="integer" indexed="true" stored="true" required="false" />
|
||||
|
||||
<!--Workflow spefic-->
|
||||
<field name="workflowStep" type="string" indexed="true" stored="true" required="false" multiValued="true"/>
|
||||
<field name="previousWorkflowStep" type="string" indexed="true" stored="true" required="false" multiValued="true"/>
|
||||
<field name="owner" type="string" indexed="true" stored="true" required="false" multiValued="true"/>
|
||||
<field name="workflowStep" type="string" indexed="true" stored="true" required="false" multiValued="true" docValues="true"/>
|
||||
<field name="previousWorkflowStep" type="string" indexed="true" stored="true" required="false" multiValued="true" docValues="true"/>
|
||||
<field name="owner" type="string" indexed="true" stored="true" required="false" multiValued="true" docValues="true"/>
|
||||
<field name="submitter" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="actor" type="integer" indexed="true" stored="true" required="false" />
|
||||
<field name="workflowItemId" type="integer" indexed="true" stored="true" required="false" />
|
||||
|
@@ -35,7 +35,7 @@
|
||||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||
<luceneMatchVersion>4.10</luceneMatchVersion>
|
||||
|
||||
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
||||
identified and use them to resolve any "plugins" specified in
|
||||
|
4
pom.xml
4
pom.xml
@@ -31,8 +31,8 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<java.version>1.7</java.version>
|
||||
<lucene.version>4.4.0</lucene.version>
|
||||
<solr.version>4.4.0</solr.version>
|
||||
<lucene.version>4.9.1</lucene.version>
|
||||
<solr.version>4.10.2</solr.version>
|
||||
<jena.version>2.12.0</jena.version>
|
||||
<slf4j.version>1.6.1</slf4j.version>
|
||||
<!-- 'root.basedir' is the path to the root [dspace-src] dir. It must be redefined by each child POM,
|
||||
|
Reference in New Issue
Block a user