diff --git a/dspace-api/src/main/java/org/dspace/search/DSQuery.java b/dspace-api/src/main/java/org/dspace/search/DSQuery.java index 08dfa33e03..060a9de99e 100644 --- a/dspace-api/src/main/java/org/dspace/search/DSQuery.java +++ b/dspace-api/src/main/java/org/dspace/search/DSQuery.java @@ -57,7 +57,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; -import org.apache.oro.text.perl.Perl5Util; import org.dspace.content.Collection; import org.dspace.content.Community; import org.dspace.core.ConfigurationManager; @@ -279,40 +278,26 @@ public class DSQuery // Here we substitute the boolean operators -- which // have to be uppercase -- before tranforming the // query string to lowercase. - Perl5Util util = new Perl5Util(); - - myquery = util.substitute("s/ AND / && /g", myquery); - myquery = util.substitute("s/ OR / || /g", myquery); - myquery = util.substitute("s/ NOT / ! /g", myquery); - - myquery = myquery.toLowerCase(); - - return myquery; + return myquery.replaceAll(" AND ", " && ") + .replaceAll(" OR ", " || ") + .replaceAll(" NOT ", " ! ") + .toLowerCase(); } static String stripHandles(String myquery) { // Drop beginning pieces of full handle strings - Perl5Util util = new Perl5Util(); - - myquery = util.substitute("s|^(\\s+)?http://hdl\\.handle\\.net/||", - myquery); - myquery = util.substitute("s|^(\\s+)?hdl:||", myquery); - - return myquery; + return myquery.replaceAll("^\\s*http://hdl\\.handle\\.net/", "") + .replaceAll("^\\s*hdl:", ""); } static String stripAsterisk(String myquery) { // query strings (or words) begining with "*" cause a null pointer error - Perl5Util util = new Perl5Util(); - - myquery = util.substitute("s/^\\*//", myquery); - myquery = util.substitute("s| \\*| |", myquery); - myquery = util.substitute("s|\\(\\*|\\(|", myquery); - myquery = util.substitute("s|:\\*|:|", myquery); - - return myquery; + return myquery.replaceAll("^\\*", "") + .replaceAll("\\s\\*", " ") + .replaceAll("\\(\\*", "(") + .replaceAll(":\\*", ":"); } /** diff --git a/dspace-api/src/main/java/org/dspace/search/QueryArgs.java b/dspace-api/src/main/java/org/dspace/search/QueryArgs.java index c274d73e87..6e1268ece0 100644 --- a/dspace-api/src/main/java/org/dspace/search/QueryArgs.java +++ b/dspace-api/src/main/java/org/dspace/search/QueryArgs.java @@ -44,12 +44,13 @@ import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.StringTokenizer; import javax.servlet.http.HttpServletRequest; import org.dspace.core.Constants; import org.dspace.sort.SortOption; -import org.apache.oro.text.perl.Perl5Util; +import org.apache.commons.lang.StringUtils; /** * Contains the arguments for a query. Fill it out and pass to the query engine @@ -234,23 +235,80 @@ public class QueryArgs */ private String buildQueryPart(String myquery, String myfield) { - Perl5Util util = new Perl5Util(); - String newquery = "("; + StringBuilder newQuery = new StringBuilder(); + newQuery.append("("); - if (!myfield.equals("ANY")) + boolean newTerm = true; + boolean inPhrase = false; + char phraseChar = '\"'; + + StringTokenizer qtok = new StringTokenizer(myquery, " \t\n\r\f\"\'", true); + + while (qtok.hasMoreTokens()) { - newquery = newquery + myfield + ":"; - myquery = util.substitute("s/\'(.*)\'/\"$1\"/g", myquery); - - if (!util.match("/\".*\"/", myquery)) + String token = qtok.nextToken(); + if (StringUtils.isWhitespace(token)) { - myquery = util.substitute("s/ / " + myfield + ":/g", myquery); + if (!inPhrase) + { + newTerm = true; + } + + newQuery.append(token); + } + else + { + // Matched the end of the phrase + if (inPhrase && token.charAt(0) == phraseChar) + { + newQuery.append("\""); + inPhrase = false; + } + else + { + // If we aren't dealing with a new term, and have a single quote + // don't touch it. (for example, the apostrophe in it's). + if (!newTerm && token.charAt(0) == '\'') + { + newQuery.append(token); + } + else + { + // Treat - my"phrased query" - as - my "phrased query" + if (!newTerm && token.charAt(0) == '\"') + { + newQuery.append(" "); + newTerm = true; + } + + // This is a new term in the query (ie. preceeded by nothing or whitespace) + // so apply a field restriction if specified + if (newTerm && !myfield.equals("ANY")) + { + newQuery.append(myfield).append(":"); + } + + // Open a new phrase, and closing at the corresponding character + // ie. 'my phrase' or "my phrase" + if (token.charAt(0) == '\"' || token.charAt(0) == '\'') + { + newQuery.append("\""); + inPhrase = true; + newTerm = false; + phraseChar = token.charAt(0); + } + else + { + newQuery.append(token); + newTerm = false; + } + } + } } } - newquery = newquery + myquery + ")"; - - return (newquery); + newQuery.append(")"); + return newQuery.toString(); } /**