[DS-980] Upgraded solr & lucene to version 3.3.0

git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@6545 9c30dcfa-912a-0410-8fc2-9e0234be79fd
2025-10-18 15:33:09 +00:00 · 2011-08-12 19:49:34 +00:00
parent cda73317df
commit e40a2c9441
9 changed files with 1016 additions and 1028 deletions
--- a/dspace-api/src/main/java/org/dspace/search/DSAnalyzer.java
+++ b/dspace-api/src/main/java/org/dspace/search/DSAnalyzer.java
@@ -16,6 +16,7 @@ import org.apache.lucene.analysis.PorterStemFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.util.Version;
 import org.dspace.core.ConfigurationManager;

 /**
@@ -47,7 +48,7 @@ public class DSAnalyzer extends Analyzer
    /*
     * Stop table
     */
-    protected static final Set stopSet = StopFilter.makeStopSet(STOP_WORDS);
+    protected static final Set stopSet = StopFilter.makeStopSet(Version.LUCENE_33,STOP_WORDS);

    /*
     * Create a token stream for this analyzer.
@@ -59,7 +60,7 @@ public class DSAnalyzer extends Analyzer

        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
-        result = new StopFilter(result, stopSet);
+        result = new StopFilter(Version.LUCENE_33, result, stopSet);
        result = new PorterStemFilter(result);

        return result;
--- a/dspace-api/src/main/java/org/dspace/search/DSIndexer.java
+++ b/dspace-api/src/main/java/org/dspace/search/DSIndexer.java
@@ -39,8 +39,12 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
 import org.dspace.content.Bitstream;
 import org.dspace.content.Bundle;
 import org.dspace.content.Collection;
@@ -208,22 +212,23 @@ public class DSIndexer
        /*
         * Create the index directory if it doesn't already exist.
         */
-        if (!IndexReader.indexExists(indexDirectory))
-    	{
        try
        {
+            if (!IndexReader.indexExists(FSDirectory.open(new File(indexDirectory))))
+            {
+
                if (!new File(indexDirectory).mkdirs())
                {
                    log.error("Unable to create index directory: " + indexDirectory);
                }
                openIndex(true).close();
            }
+        }
        catch (IOException e)
        {
            throw new IllegalStateException("Could not create search index: " + e.getMessage(),e);
        }
    }
-    }

    public static void setBatchProcessingMode(boolean mode)
    {
@@ -902,8 +907,15 @@ public class DSIndexer
    private static IndexWriter openIndex(boolean wipeExisting)
            throws IOException
    {
+        Directory dir = FSDirectory.open(new File(indexDirectory));
+        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_33, getAnalyzer());
+        if(wipeExisting){
+            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+        }else{
+            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+        }

-    	IndexWriter writer = new IndexWriter(indexDirectory, getAnalyzer(), wipeExisting);
+        IndexWriter writer = new IndexWriter(dir, iwc);

        /* Set maximum number of terms to index if present in dspace.cfg */
        if (maxfieldlength == -1)
@@ -982,8 +994,8 @@ public class DSIndexer

        if (name != null)
        {
-        	doc.add(new Field("name", name, Field.Store.NO, Field.Index.TOKENIZED));
-        	doc.add(new Field("default", name, Field.Store.NO, Field.Index.TOKENIZED));
+        	doc.add(new Field("name", name, Field.Store.NO, Field.Index.ANALYZED));
+        	doc.add(new Field("default", name, Field.Store.NO, Field.Index.ANALYZED));
        }

        return doc;
@@ -1008,8 +1020,8 @@ public class DSIndexer

        if (name != null)
        {
-        	doc.add(new Field("name", name, Field.Store.NO, Field.Index.TOKENIZED));
-        	doc.add(new Field("default", name, Field.Store.NO, Field.Index.TOKENIZED));
+        	doc.add(new Field("name", name, Field.Store.NO, Field.Index.ANALYZED));
+        	doc.add(new Field("default", name, Field.Store.NO, Field.Index.ANALYZED));
        }

        return doc;
@@ -1062,12 +1074,12 @@ public class DSIndexer
                                doc.add( new Field(indexConfigArr[i].indexName,
                                                   DateTools.dateToString(d, DateTools.Resolution.SECOND),
                                                   Field.Store.NO,
-                                                   Field.Index.UN_TOKENIZED));
+                                                   Field.Index.NOT_ANALYZED));

                                doc.add( new Field(indexConfigArr[i].indexName  + ".year",
                                                    DateTools.dateToString(d, DateTools.Resolution.YEAR),
                                                    Field.Store.NO,
-                                                    Field.Index.UN_TOKENIZED));
+                                                    Field.Index.NOT_ANALYZED));
                            }
                        }
                        else if ("date".equalsIgnoreCase(indexConfigArr[i].type))
@@ -1078,12 +1090,12 @@ public class DSIndexer
                                doc.add( new Field(indexConfigArr[i].indexName,
                                                   DateTools.dateToString(d, DateTools.Resolution.DAY),
                                                   Field.Store.NO,
-                                                   Field.Index.UN_TOKENIZED));
+                                                   Field.Index.NOT_ANALYZED));

                                doc.add( new Field(indexConfigArr[i].indexName  + ".year",
                                                    DateTools.dateToString(d, DateTools.Resolution.YEAR),
                                                    Field.Store.NO,
-                                                    Field.Index.UN_TOKENIZED));
+                                                    Field.Index.NOT_ANALYZED));
                            }
                        }
                        else
@@ -1099,7 +1111,7 @@ public class DSIndexer
                                doc.add( new Field(indexConfigArr[i].indexName+"_authority",
                                   mydc[j].authority,
                                   Field.Store.NO,
-                                   Field.Index.UN_TOKENIZED));
+                                   Field.Index.NOT_ANALYZED));

                                boolean valueAlreadyIndexed = false;
                                if (variants != null)
@@ -1110,7 +1122,7 @@ public class DSIndexer
                                        doc.add( new Field(indexConfigArr[i].indexName,
                                                           var,
                                                           Field.Store.NO,
-                                                           Field.Index.TOKENIZED));
+                                                           Field.Index.ANALYZED));
                                        if (var.equals(mydc[j].value))
                                        {
                                            valueAlreadyIndexed = true;
@@ -1121,7 +1133,7 @@ public class DSIndexer
                                             doc.add( new Field("default",
                                                       var,
                                                       Field.Store.NO,
-                                                       Field.Index.TOKENIZED));
+                                                       Field.Index.ANALYZED));
                                        }
                                    }
                                }
@@ -1132,7 +1144,7 @@ public class DSIndexer
                                    doc.add( new Field(indexConfigArr[i].indexName,
                                                       mydc[j].value,
                                                       Field.Store.NO,
-                                                       Field.Index.TOKENIZED));
+                                                       Field.Index.ANALYZED));
                                }
                            }
                            else
@@ -1141,11 +1153,11 @@ public class DSIndexer
 	                            doc.add( new Field(indexConfigArr[i].indexName,
 	                                               mydc[j].value,
 	                                               Field.Store.NO,
-	                                               Field.Index.TOKENIZED));
+	                                               Field.Index.ANALYZED));
                        	}
                        }

-                        doc.add( new Field("default", mydc[j].value, Field.Store.NO, Field.Index.TOKENIZED));
+                        doc.add( new Field("default", mydc[j].value, Field.Store.NO, Field.Index.ANALYZED));
                    }
                }
            }
@@ -1164,7 +1176,7 @@ public class DSIndexer
                if (dcv.length > 0)
                {
                    String value = OrderFormat.makeSortString(dcv[0].value, dcv[0].language, so.getType());
-                    doc.add( new Field("sort_" + so.getName(), value, Field.Store.NO, Field.Index.UN_TOKENIZED) );
+                    doc.add( new Field("sort_" + so.getName(), value, Field.Store.NO, Field.Index.NOT_ANALYZED) );
                }
            }
        }
@@ -1230,15 +1242,15 @@ public class DSIndexer

        // want to be able to check when last updated
        // (not tokenized, but it is indexed)
-        doc.add(new Field(LAST_INDEXED_FIELD, Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
-        doc.add(new Field(DOCUMENT_STATUS_FIELD, "archived", Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field(LAST_INDEXED_FIELD, Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add(new Field(DOCUMENT_STATUS_FIELD, "archived", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // KEPT FOR BACKWARDS COMPATIBILITY
        // do location, type, handle first
        doc.add(new Field("type", Integer.toString(type), Field.Store.YES, Field.Index.NO));

        // New fields to weaken the dependence on handles, and allow for faster list display
-        doc.add(new Field("search.resourcetype", Integer.toString(type), Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field("search.resourcetype", Integer.toString(type), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("search.resourceid",   Integer.toString(id),   Field.Store.YES, Field.Index.NO));

        // want to be able to search for handle, so use keyword
@@ -1246,20 +1258,20 @@ public class DSIndexer
        if (handle != null)
        {
            // ??? not sure what the "handletext" field is but it was there in writeItemIndex ???
-            doc.add(new Field("handletext", handle, Field.Store.YES, Field.Index.TOKENIZED));
+            doc.add(new Field("handletext", handle, Field.Store.YES, Field.Index.ANALYZED));

            // want to be able to search for handle, so use keyword
            // (not tokenized, but it is indexed)
-            doc.add(new Field("handle", handle, Field.Store.YES, Field.Index.UN_TOKENIZED));
+            doc.add(new Field("handle", handle, Field.Store.YES, Field.Index.NOT_ANALYZED));

            // add to full text index
-            doc.add(new Field("default", handle, Field.Store.NO, Field.Index.TOKENIZED));
+            doc.add(new Field("default", handle, Field.Store.NO, Field.Index.ANALYZED));
        }

        if(location != null)
        {
-            doc.add(new Field("location", location, Field.Store.NO, Field.Index.TOKENIZED));
-    	    doc.add(new Field("default", location, Field.Store.NO, Field.Index.TOKENIZED));
+            doc.add(new Field("location", location, Field.Store.NO, Field.Index.ANALYZED));
+    	    doc.add(new Field("default", location, Field.Store.NO, Field.Index.ANALYZED));
        }

        return doc;
@@ -1271,8 +1283,8 @@ public class DSIndexer

        // want to be able to check when last updated
        // (not tokenized, but it is indexed)
-        doc.add(new Field(LAST_INDEXED_FIELD,    Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
-        doc.add(new Field(DOCUMENT_STATUS_FIELD, "deleted", Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field(LAST_INDEXED_FIELD,    Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add(new Field(DOCUMENT_STATUS_FIELD, "deleted", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // Do not add any other fields, as we don't want to be able to find it - just check the last indexed time

@@ -1285,8 +1297,8 @@ public class DSIndexer

        // want to be able to check when last updated
        // (not tokenized, but it is indexed)
-        doc.add(new Field(LAST_INDEXED_FIELD,    Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
-        doc.add(new Field(DOCUMENT_STATUS_FIELD, "withdrawn", Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field(LAST_INDEXED_FIELD,    Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add(new Field(DOCUMENT_STATUS_FIELD, "withdrawn", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // Do not add any other fields, as we don't want to be able to find it - just check the last indexed time

--- a/dspace-api/src/main/java/org/dspace/search/DSNonStemmingAnalyzer.java
+++ b/dspace-api/src/main/java/org/dspace/search/DSNonStemmingAnalyzer.java
@@ -13,6 +13,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.util.Version;

 /**
 * Custom Lucene Analyzer that combines the standard filter, lowercase filter
@@ -32,7 +33,7 @@ public class DSNonStemmingAnalyzer extends DSAnalyzer

        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
-        result = new StopFilter(result, stopSet);
+        result = new StopFilter(Version.LUCENE_33, result, stopSet);

        return result;
    }
--- a/dspace-api/src/main/java/org/dspace/search/DSQuery.java
+++ b/dspace-api/src/main/java/org/dspace/search/DSQuery.java
@@ -7,6 +7,7 @@
 */
 package org.dspace.search;

+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -19,12 +20,14 @@ import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.queryParser.TokenMgrError;
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
 import org.dspace.content.Collection;
 import org.dspace.content.Community;
 import org.dspace.core.ConfigurationManager;
@@ -113,9 +116,9 @@ public class DSQuery
        try
        {
            // grab a searcher, and do the search
-            Searcher searcher = getSearcher(c);
+            IndexSearcher searcher = getSearcher(c);

-            QueryParser qp = new QueryParser("default", DSIndexer.getAnalyzer());
+            QueryParser qp = new QueryParser(Version.LUCENE_33, "default", DSIndexer.getAnalyzer());
            log.debug("Final query string: " + querystring);
            
            if (operator == null || operator.equals("OR"))
@@ -128,55 +131,28 @@ public class DSQuery
            }

            Query myquery = qp.parse(querystring);
-            Hits hits = null;
-
-            try
-            {
-                if (args.getSortOption() == null)
-                {
-                    SortField[] sortFields = new SortField[] {
-                            new SortField("search.resourcetype", true),
-                            new SortField(null, SortField.SCORE, SortOption.ASCENDING.equals(args.getSortOrder()))
-                        };
-                    hits = searcher.search(myquery, new Sort(sortFields));
-                }
-                else
-                {
-                    SortField[] sortFields = new SortField[] {
-                            new SortField("search.resourcetype", true),
-                            new SortField("sort_" + args.getSortOption().getName(), SortOption.DESCENDING.equals(args.getSortOrder())),
-                            SortField.FIELD_SCORE
-                        };
-                    hits = searcher.search(myquery, new Sort(sortFields));
-                }
-            }
-            catch (Exception e)
-            {
-                // Lucene can throw an exception if it is unable to determine a sort time from the specified field
-                // Provide a fall back that just works on relevancy.
-                log.error("Unable to use speficied sort option: " + (args.getSortOption() == null ? "type/relevance": args.getSortOption().getName()));
-                hits = searcher.search(myquery, new Sort(SortField.FIELD_SCORE));
-            }
+            //Retrieve enough docs to get all the results we need !
+            TopDocs  hits = performQuery(args, searcher, myquery, args.getPageSize() * (args.getStart() + 1));

            // set total number of hits
-            qr.setHitCount(hits.length());
+            qr.setHitCount(hits.totalHits);

            // We now have a bunch of hits - snip out a 'window'
            // defined in start, count and return the handles
            // from that window
            // first, are there enough hits?
-            if (args.getStart() < hits.length())
+            if (args.getStart() < hits.totalHits)
            {
                // get as many as we can, up to the window size
                // how many are available after snipping off at offset 'start'?
-                int hitsRemaining = hits.length() - args.getStart();
+                int hitsRemaining = hits.totalHits - args.getStart();

                int hitsToProcess = (hitsRemaining < args.getPageSize()) ? hitsRemaining
                        : args.getPageSize();

                for (int i = args.getStart(); i < (args.getStart() + hitsToProcess); i++)
                {
-                    Document d = hits.doc(i);
+                    Document d = searcher.doc(hits.scoreDocs[i].doc);

                    String resourceId   = d.get("search.resourceid");
                    String resourceType = d.get("search.resourcetype");
@@ -187,15 +163,15 @@ public class DSQuery
                    switch (Integer.parseInt( resourceType != null ? resourceType : handleType))
                    {
                        case Constants.ITEM:
-                            hitTypes.add(Integer.valueOf(Constants.ITEM));
+                            hitTypes.add(Constants.ITEM);
                            break;

                        case Constants.COLLECTION:
-                            hitTypes.add(Integer.valueOf(Constants.COLLECTION));
+                            hitTypes.add(Constants.COLLECTION);
                            break;

                        case Constants.COMMUNITY:
-                            hitTypes.add(Integer.valueOf(Constants.COMMUNITY));
+                            hitTypes.add(Constants.COMMUNITY);
                            break;
                    }

@@ -230,6 +206,38 @@ public class DSQuery
        return qr;
    }

+    private static TopDocs performQuery(QueryArgs args, IndexSearcher searcher, Query myquery, int max) throws IOException {
+        TopDocs hits;
+        try
+        {
+            if (args.getSortOption() == null)
+            {
+                SortField[] sortFields = new SortField[] {
+                        new SortField("search.resourcetype", SortField.INT, true),
+                        new SortField(null, SortField.SCORE, SortOption.ASCENDING.equals(args.getSortOrder()))
+                    };
+                hits = searcher.search(myquery, max, new Sort(sortFields));
+            }
+            else
+            {
+                SortField[] sortFields = new SortField[] {
+                        new SortField("search.resourcetype", SortField.INT, true),
+                        new SortField("sort_" + args.getSortOption().getName(), SortField.STRING, SortOption.DESCENDING.equals(args.getSortOrder())),
+                        SortField.FIELD_SCORE
+                    };
+                hits = searcher.search(myquery, max, new Sort(sortFields));
+            }
+        }
+        catch (Exception e)
+        {
+            // Lucene can throw an exception if it is unable to determine a sort time from the specified field
+            // Provide a fall back that just works on relevancy.
+            log.error("Unable to use speficied sort option: " + (args.getSortOption() == null ? "type/relevance": args.getSortOption().getName()));
+            hits = searcher.search(myquery, max, new Sort(SortField.FIELD_SCORE));
+        }
+        return hits;
+    }
+
    static String checkEmptyQuery(String myquery)
    {
        if (myquery == null || myquery.equals("()") || myquery.equals(""))
@@ -359,7 +367,7 @@ public class DSQuery
            {
                String thisHandle = (String) i.next();
                Integer thisType = (Integer) j.next();
-                String type = Constants.typeText[thisType.intValue()];
+                String type = Constants.typeText[thisType];

                // also look up type
                System.out.println(type + "\t" + thisHandle);
@@ -421,7 +429,10 @@ public class DSQuery
       
        // If we have already opened a searcher, check to see if the index has been updated
        // If it has, we need to close the existing searcher - we will open a new one later
-        if (searcher != null && lastModified != IndexReader.getCurrentVersion(indexDir))
+
+        Directory searchDir = FSDirectory.open(new File(indexDir));
+
+        if (searcher != null && lastModified != IndexReader.getCurrentVersion(searchDir))
        {
            try
            {
@@ -445,17 +456,18 @@ public class DSQuery
        if (searcher == null)
        {
            // So, open a new searcher
-            lastModified = IndexReader.getCurrentVersion(indexDir);
+            lastModified = IndexReader.getCurrentVersion(searchDir);
            String osName = System.getProperty("os.name");
            if (osName != null && osName.toLowerCase().contains("windows"))
            {
-                searcher = new IndexSearcher(indexDir){
+                searcher = new IndexSearcher(searchDir){
                    /*
                     * TODO: Has Lucene fixed this bug yet?
                     * Lucene doesn't release read locks in
                     * windows properly on finalize. Our hack
                     * extend IndexSearcher to force close().
                     */
+                    @Override
                    protected void finalize() throws Throwable {
                        this.close();
                        super.finalize();
@@ -464,7 +476,7 @@ public class DSQuery
            }
            else
            {
-                searcher = new IndexSearcher(indexDir);
+                searcher = new IndexSearcher(searchDir);
            }
        }

--- a/dspace-discovery/dspace-discovery-solr/pom.xml
+++ b/dspace-discovery/dspace-discovery-solr/pom.xml
@@ -28,7 +28,7 @@
        <dependency>
            <groupId>org.apache.solr</groupId>
            <artifactId>solr-solrj</artifactId>
-            <version>1.4.1</version>
+            <version>3.3.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
--- a/dspace-stats/pom.xml
+++ b/dspace-stats/pom.xml
@@ -81,7 +81,7 @@
 		<dependency>
 			<groupId>org.apache.solr</groupId>
 			<artifactId>solr-solrj</artifactId>
-            <version>1.4.1</version>
+                        <version>3.3.0</version>
 		</dependency>
 		<dependency>
 			<groupId>org.dspace.dependencies</groupId>
--- a/dspace/modules/solr/pom.xml
+++ b/dspace/modules/solr/pom.xml
@@ -46,7 +46,7 @@
       <dependency>
           <groupId>org.dspace</groupId>
           <artifactId>dspace-solr</artifactId>
-           <version>1.4.1.0</version>
+           <version>3.3.0.0</version>
           <classifier>skinny</classifier>
           <type>war</type>
       </dependency>
@@ -54,7 +54,7 @@
       <dependency>
           <groupId>org.dspace</groupId>
           <artifactId>dspace-solr</artifactId>
-           <version>1.4.1.0</version>
+           <version>3.3.0.0</version>
           <classifier>classes</classifier>
           <type>jar</type>
       </dependency>
--- a/dspace/solr/search/conf/schema.xml
+++ b/dspace/solr/search/conf/schema.xml
@@ -45,14 +45,16 @@
    that avoids logging every request
 -->

-<schema name="example" version="1.2">
+<schema name="example" version="1.4">
  <!-- attribute "name" is the name of this schema and is only used for display purposes.
       Applications should change this to reflect the nature of the search collection.
-       version="1.2" is Solr's version number for the schema syntax and semantics.  It should
+       version="1.4" is Solr's version number for the schema syntax and semantics.  It should
       not normally be changed by applications.
       1.0: multiValued attribute did not exist, all fields are multiValued by nature
       1.1: multiValued attribute introduced, false by default
       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
+       1.3: removed optional field compress feature
+       1.4: default auto-phrase (QueryParser feature) to off
     -->

  <types>
@@ -271,16 +273,7 @@
      </analyzer>
    </fieldType>

-        <!--
-     Setup simple analysis for spell checking
-     -->
-    <fieldType name="textSpell" class="solr.StrField" positionIncrementGap="100" >
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-      </analyzer>
-    </fieldType>
+

    <!-- A general unstemmed text field - good if one does not know the language of the field -->
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
@@ -410,36 +403,6 @@
         any data added to them will be ignored outright.  -->
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

-        <!-- This is an example of using the KeywordTokenizer along
-         With various TokenFilterFactories to produce a sortable field
-         that does not include some properties of the source text
-      -->
-    <fieldType name="handleIdentifier" class="solr.StrField" sortMissingLast="true" omitNorms="true">
-      <analyzer>
-          <!-- KeywordTokenizer does no actual tokenizing, so the entire
-             input string is preserved as a single token
-          -->
-        <tokenizer class="solr.KeywordTokenizerFactory"/>
-
-        <filter class="solr.LowerCaseFilterFactory" />
-        <!-- The TrimFilter removes any leading or trailing whitespace -->
-        <filter class="solr.TrimFilterFactory" />
-        <!-- The PatternReplaceFilter gives you the flexibility to use
-             Java Regular expression to replace any sequence of characters
-             matching a pattern with an arbitrary replacement string,
-             which may include back refrences to portions of the orriginal
-             string matched by the pattern.
-
-             See the Java Regular Expression documentation for more
-             infomation on pattern and replacement string syntax.
-
-             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
-          -->
-        <filter class="solr.PatternReplaceFilterFactory" pattern="http://hdl.handle.net/" replacement="" replace="all"/>
-
-      </analyzer>
-    </fieldType>
-

    <!-- This is an example of using the KeywordTokenizer along
         With various TokenFilterFactories to produce a sortable field
@@ -580,7 +543,6 @@
        results by manufacturer.  copied from "manu" via copyField -->
   <!--<field name="manu_exact" type="string" indexed="true" stored="false"/>-->

-   <!--<field name="spell" type="textSpell" indexed="true" stored="true" multiValued="true"/>-->
   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
        will be used if the name matches any of the patterns.
        RESTRICTION: the glob-like pattern in the name attribute must have
--- a/pom.xml
+++ b/pom.xml
@@ -347,12 +347,12 @@
         <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
-            <version>2.9.3</version>
+            <version>3.3.0</version>
         </dependency>
         <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers</artifactId>
-            <version>2.9.3</version>
+            <version>3.3.0</version>
         </dependency>
         <dependency>
            <groupId>org.dspace</groupId>