Merge pull request #55 from EKT/DS-1231

[DS-1231] Indexing both stored and display-value from input-forms for search

Squashed commit of the following:

commit 426db183fc
Merge: b05c6a3 8334fce
Author: kstamatis <kstamatis@ekt.gr>
Date:   Sat Sep 15 08:52:12 2012 -0700

    Merge pull request #2 from abollini/DS-1231

    Ds 1231

commit 8334fceed0
Author: Andrea Bollini <bollini@cilea.it>
Date:   Sat Sep 15 17:31:08 2012 +0200

    Use the default input set if the item is not yet archived

commit c6be362592
Author: Andrea Bollini <bollini@cilea.it>
Date:   Sat Sep 15 17:23:48 2012 +0200

    Minor changes

    Code formatted using the dspace style eclipse formatter
    No store values in the index as the lucene projection is not currently used
    use generics to avoid raw type warning

commit 6e6933d174
Merge: 3b1cc06 b05c6a3
Author: Andrea Bollini <bollini@cilea.it>
Date:   Sat Sep 15 16:19:20 2012 +0200

    Merge remote-tracking branch 'EKT/DS-1231' into DS-1231

commit b05c6a3288
Author: Rania Stathopoulou <iostath@ekt.gr>
Date:   Fri Sep 14 19:49:27 2012 +0300

    Search indexing all values (now getControlledVocabulariesDisplayValueLocalized returns a list of all the values and the DSIndexer manages these values)
    A return statement is added if the item has no collection

commit 33da186e6c
Author: Rania Stathopoulou <iostath@ekt.gr>
Date:   Thu Sep 13 11:19:56 2012 +0300

    Code Indentation

commit c31651bdd8
Author: Rania Stathopoulou <iostath@ekt.gr>
Date:   Tue Sep 11 11:54:43 2012 +0300

    Code changed in order to index both stored and display-value for all localized input-forms if the user specifies 'inputform' in Search Index Configuration of dspace.cfg, e.g: search.index.12 = language:dc.language:inputform

commit d278958c22
Author: Rania Stathopoulou <iostath@ekt.gr>
Date:   Fri Sep 7 12:19:25 2012 +0300

    Package name 'gr.ekt.repositories.dspace.utils' removed.
    All functions in gr.ekt.repositories.dspace.utils.Utilities.java were transfered in org.dspace.app.util.Util.java
    Documentation added for the above functions

commit 8135fd4eb7
Author: EKT <iostath@ekt.gr>
Date:   Wed Aug 8 18:40:40 2012 +0300

    Unthrown exceptions added

commit fa8796bfeb
Author: EKT <iostath@ekt.gr>
Date:   Wed Aug 8 17:06:20 2012 +0300

    Indexing both stored and display-value from input-forms for search
This commit is contained in:
Rania Stathopoulou
2012-09-17 11:13:30 +02:00
committed by Andrea Bollini
parent d0fa70843e
commit b23cb050d1
3 changed files with 171 additions and 7 deletions

View File

@@ -9,16 +9,23 @@ package org.dspace.app.util;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.sql.SQLException;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Properties; import java.util.Properties;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.content.DCValue;
import org.dspace.content.Item;
import org.dspace.core.Constants; import org.dspace.core.Constants;
import org.dspace.core.I18nUtil;
/** /**
@@ -350,4 +357,113 @@ public class Util {
} }
return sourceVersion; return sourceVersion;
} }
/**
* Get a list of all the respective "displayed-value(s)" from the given
* "stored-value(s)" for a specific metadata field of a DSpace Item, by
* reading input-forms.xml
*
* @param item
* The Dspace Item
* @param values
* A DCValue[] array of the specific "stored-value(s)"
* @param schema
* A String with the schema name of the metadata field
* @param element
* A String with the element name of the metadata field
* @param qualifier
* A String with the qualifier name of the metadata field
* @return A list of the respective "displayed-values"
*/
public static List<String> getControlledVocabulariesDisplayValueLocalized(
Item item, DCValue[] values, String schema, String element,
String qualifier, Locale locale) throws SQLException,
DCInputsReaderException
{
List<String> toReturn = new ArrayList<String>();
DCInput myInputs = null;
boolean myInputsFound = false;
String formFileName = I18nUtil.getInputFormsFileName(locale);
String col_handle = "";
Collection collection = item.getOwningCollection();
if (collection == null)
{
// set an empty handle so to get the default input set
col_handle = "";
}
else
{
col_handle = collection.getHandle();
}
// Read the input form file for the specific collection
DCInputsReader inputsReader = new DCInputsReader(formFileName);
DCInputSet inputSet = inputsReader.getInputs(col_handle);
// Replace the values of DCValue[] with the correct ones in case of
// controlled vocabularies
String currentField = schema + "." + element
+ (qualifier == null ? "" : "." + qualifier);
if (inputSet != null)
{
int pageNums = inputSet.getNumberPages();
for (int p = 0; p < pageNums; p++)
{
DCInput[] inputs = inputSet.getPageRows(p, false, false);
if (inputs != null)
{
for (int i = 0; i < inputs.length; i++)
{
String inputField = inputs[i].getSchema()
+ "."
+ inputs[i].getElement()
+ (inputs[i].getQualifier() == null ? "" : "."
+ inputs[i].getQualifier());
if (currentField.equals(inputField))
{
myInputs = inputs[i];
myInputsFound = true;
break;
}
}
}
if (myInputsFound)
break;
}
}
if (myInputsFound)
{
for (int j = 0; j < values.length; j++)
{
String pairsName = myInputs.getPairsType();
String stored_value = values[j].value;
String displayVal = myInputs.getDisplayString(pairsName,
stored_value);
if (displayVal != null && !"".equals(displayVal))
{
toReturn.add(displayVal);
}
}
}
return toReturn;
}
} }

View File

@@ -66,6 +66,9 @@ import org.dspace.handle.HandleManager;
import org.dspace.sort.SortOption; import org.dspace.sort.SortOption;
import org.dspace.sort.OrderFormat; import org.dspace.sort.OrderFormat;
import org.dspace.app.util.DCInputsReaderException;
import org.dspace.app.util.Util;
/** /**
* DSIndexer contains the methods that index Items and their metadata, * DSIndexer contains the methods that index Items and their metadata,
* collections, communities, etc. It is meant to either be invoked from the * collections, communities, etc. It is meant to either be invoked from the
@@ -251,7 +254,7 @@ public class DSIndexer
* @throws SQLException * @throws SQLException
* @throws IOException * @throws IOException
*/ */
public static void indexContent(Context context, DSpaceObject dso) throws SQLException public static void indexContent(Context context, DSpaceObject dso) throws SQLException, DCInputsReaderException
{ {
indexContent(context, dso, false); indexContent(context, dso, false);
} }
@@ -267,7 +270,7 @@ public class DSIndexer
* @throws SQLException * @throws SQLException
* @throws IOException * @throws IOException
*/ */
public static void indexContent(Context context, DSpaceObject dso, boolean force) throws SQLException public static void indexContent(Context context, DSpaceObject dso, boolean force) throws SQLException, DCInputsReaderException
{ {
try try
{ {
@@ -649,7 +652,7 @@ public class DSIndexer
} }
static IndexingTask prepareIndexingTask(DSpaceObject dso, boolean force) throws SQLException, IOException static IndexingTask prepareIndexingTask(DSpaceObject dso, boolean force) throws SQLException, IOException, DCInputsReaderException
{ {
String handle = dso.getHandle(); String handle = dso.getHandle();
Term term = new Term("handle", handle); Term term = new Term("handle", handle);
@@ -1037,7 +1040,7 @@ public class DSIndexer
* @throws SQLException * @throws SQLException
* @throws IOException * @throws IOException
*/ */
private static Document buildDocumentForItem(Item item) throws SQLException, IOException private static Document buildDocumentForItem(Item item) throws SQLException, IOException, DCInputsReaderException
{ {
String handle = item.getHandle(); String handle = item.getHandle();
@@ -1065,7 +1068,49 @@ public class DSIndexer
mydc = item.getMetadata(indexConfigArr[i].schema, indexConfigArr[i].element, indexConfigArr[i].qualifier, Item.ANY); mydc = item.getMetadata(indexConfigArr[i].schema, indexConfigArr[i].element, indexConfigArr[i].qualifier, Item.ANY);
} }
for (j = 0; j < mydc.length; j++)
//Index the controlled vocabularies localized display values for all localized input-forms.xml (e.g. input-forms_el.xml)
if ("inputform".equalsIgnoreCase(indexConfigArr[i].type)){
List<String> newValues = new ArrayList<String>();
Locale[] supportedLocales=I18nUtil.getSupportedLocales();
// Get the display value of the respective stored value
for (int k = 0; k < supportedLocales.length; k++)
{
List<String> displayValues = Util
.getControlledVocabulariesDisplayValueLocalized(
item, mydc, indexConfigArr[i].schema,
indexConfigArr[i].element,
indexConfigArr[i].qualifier,
supportedLocales[k]);
if (displayValues != null && !displayValues.isEmpty())
{
for (int d = 0; d < displayValues.size(); d++)
{
newValues.add(displayValues.get(d));
}
}
}
if (newValues!=null){
for (int m=0;m<newValues.size();m++){
if (!"".equals(newValues.get(m))){
String toAdd=(String) newValues.get(m);
doc.add( new Field(indexConfigArr[i].indexName,
toAdd,
Field.Store.NO,
Field.Index.ANALYZED));
}
}
}
}
for (j = 0; j < mydc.length; j++)
{ {
if (!StringUtils.isEmpty(mydc[j].value)) if (!StringUtils.isEmpty(mydc[j].value))
{ {

View File

@@ -263,8 +263,11 @@ search.maxfieldlength = 10000
# DC metadata elements.qualifiers to be indexed for search # DC metadata elements.qualifiers to be indexed for search
# format: - search.index.[number] = [search field]:element.qualifier # format: - search.index.[number] = [search field]:element.qualifier
# - * used as wildcard # - * used as wildcard
# - inputform -> In case we have different input-forms for different repository supported locales (e.g input-forms_el.xml, input-forms_pt.xml etc). In this case, the
# stored and the displayed value from all input-forms are indexed. If the stored value is not found in input-forms, it is indexed anyway.
# e.g.:search.index.12 = language:dc.language:inputform
#
### changing these will change your search results, ### ### changing these will change your search results, ###
### but will NOT automatically change your search displays ### ### but will NOT automatically change your search displays ###