mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 18:14:26 +00:00
More stable metadata import (SFP 1670093). Also:
- fix database_schema.sql errors (Postgres). - fix browse index ant task git-svn-id: http://scm.dspace.org/svn/repo/trunk@2123 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -0,0 +1,206 @@
|
|||||||
|
/*
|
||||||
|
* MetadataImporter.java
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006, Imperial College London. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of Imperial College nor the names of their
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.dspace.administer;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.cli.PosixParser;
|
||||||
|
|
||||||
|
import org.apache.xpath.XPathAPI;
|
||||||
|
|
||||||
|
import org.dspace.administer.DCType;
|
||||||
|
import org.dspace.authorize.AuthorizeException;
|
||||||
|
import org.dspace.content.MetadataField;
|
||||||
|
import org.dspace.content.MetadataSchema;
|
||||||
|
import org.dspace.content.NonUniqueMetadataException;
|
||||||
|
import org.dspace.core.Context;
|
||||||
|
import org.dspace.core.LogManager;
|
||||||
|
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.w3c.dom.NamedNodeMap;
|
||||||
|
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Richard Jones
|
||||||
|
*
|
||||||
|
* This class takes an xml document as passed in the arguments and
|
||||||
|
* uses it to create metadata elements in the Metadata Registry if
|
||||||
|
* they do not already exist
|
||||||
|
*
|
||||||
|
* The format of the XML file is as follows:
|
||||||
|
*
|
||||||
|
* <dspace-dc-types>
|
||||||
|
* <dc-type>
|
||||||
|
* <schema>icadmin</schema>
|
||||||
|
* <element>status</element>
|
||||||
|
* <qualifier>dateset</qualifier>
|
||||||
|
* <scope_note>the workflow status of an item</scope_note>
|
||||||
|
* </dc-type>
|
||||||
|
*
|
||||||
|
* [....]
|
||||||
|
*
|
||||||
|
* </dspace-dc-types>
|
||||||
|
*/
|
||||||
|
public class MetadataImporter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* main method for reading user input from the command line
|
||||||
|
*/
|
||||||
|
public static void main(String[] args)
|
||||||
|
throws ParseException, SQLException, IOException, TransformerException,
|
||||||
|
ParserConfigurationException, AuthorizeException, SAXException,
|
||||||
|
NonUniqueMetadataException
|
||||||
|
{
|
||||||
|
// create an options object and populate it
|
||||||
|
CommandLineParser parser = new PosixParser();
|
||||||
|
Options options = new Options();
|
||||||
|
options.addOption("f", "file", true, "source xml file for DC fields");
|
||||||
|
CommandLine line = parser.parse(options, args);
|
||||||
|
|
||||||
|
String file = null;
|
||||||
|
if (line.hasOption('f'))
|
||||||
|
{
|
||||||
|
file = line.getOptionValue('f');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
loadRegistry(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the data from the specified file path into the database
|
||||||
|
*
|
||||||
|
* @param file the file path containing the source data
|
||||||
|
*/
|
||||||
|
public static void loadRegistry(String file)
|
||||||
|
throws SQLException, IOException, TransformerException, ParserConfigurationException,
|
||||||
|
AuthorizeException, SAXException, NonUniqueMetadataException
|
||||||
|
{
|
||||||
|
// create a context
|
||||||
|
Context context = new Context();
|
||||||
|
context.setIgnoreAuthorization(true);
|
||||||
|
|
||||||
|
// read the XML
|
||||||
|
Document document = RegistryImporter.loadXML(file);
|
||||||
|
|
||||||
|
// Get the nodes corresponding to types
|
||||||
|
NodeList typeNodes = XPathAPI.selectNodeList(document, "/dspace-dc-types/dc-type");
|
||||||
|
|
||||||
|
// Add each one as a new format to the registry
|
||||||
|
for (int i = 0; i < typeNodes.getLength(); i++)
|
||||||
|
{
|
||||||
|
Node n = typeNodes.item(i);
|
||||||
|
loadType(context, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a node in the metadata registry XML file. The node must
|
||||||
|
* be a "dc-type" node. If the type already exists, then it
|
||||||
|
* will not be reimported
|
||||||
|
*
|
||||||
|
* @param context
|
||||||
|
* DSpace context object
|
||||||
|
* @param node
|
||||||
|
* the node in the DOM tree
|
||||||
|
* @throws NonUniqueMetadataException
|
||||||
|
*/
|
||||||
|
private static void loadType(Context context, Node node)
|
||||||
|
throws SQLException, IOException, TransformerException,
|
||||||
|
AuthorizeException, NonUniqueMetadataException
|
||||||
|
{
|
||||||
|
// Get the values
|
||||||
|
String schema = RegistryImporter.getElementData(node, "schema");
|
||||||
|
String element = RegistryImporter.getElementData(node, "element");
|
||||||
|
String qualifier = RegistryImporter.getElementData(node, "qualifier");
|
||||||
|
String scopeNote = RegistryImporter.getElementData(node, "scope_note");
|
||||||
|
|
||||||
|
// If the schema is not provided default to DC
|
||||||
|
if (schema == null)
|
||||||
|
{
|
||||||
|
schema = MetadataSchema.DC_SCHEMA;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.print("Registering Metadata: " + schema + "." + element + "." + qualifier + " ... ");
|
||||||
|
|
||||||
|
// Find the matching schema object
|
||||||
|
MetadataSchema schemaObj = MetadataSchema.find(context, schema);
|
||||||
|
|
||||||
|
MetadataField mf = MetadataField.findByElement(context, schemaObj.getSchemaID(), element, qualifier);
|
||||||
|
if (mf != null)
|
||||||
|
{
|
||||||
|
System.out.println("already exists, skipping");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MetadataField field = new MetadataField();
|
||||||
|
field.setSchemaID(schemaObj.getSchemaID());
|
||||||
|
field.setElement(element);
|
||||||
|
field.setQualifier(qualifier);
|
||||||
|
field.setScopeNote(scopeNote);
|
||||||
|
field.create(context);
|
||||||
|
System.out.println("created");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print the usage message to stdout
|
||||||
|
*/
|
||||||
|
public static void usage()
|
||||||
|
{
|
||||||
|
String usage = "Use this class with the following option:\n" +
|
||||||
|
" -f <xml source file> : specify which xml source file " +
|
||||||
|
"contains the DC fields to import.\n";
|
||||||
|
System.out.println(usage);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,84 @@
|
|||||||
|
/*
|
||||||
|
* RegistryImportException.java
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006, Imperial College London. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of Imperial College nor the names of their
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.dspace.administer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Richard Jones
|
||||||
|
*
|
||||||
|
* An exception to report any problems with registry imports
|
||||||
|
*/
|
||||||
|
public class RegistryImportException extends Exception
|
||||||
|
{
|
||||||
|
private Exception e;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an empty authorize exception
|
||||||
|
*/
|
||||||
|
public RegistryImportException()
|
||||||
|
{
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create an exception with only a message
|
||||||
|
*
|
||||||
|
* @param message
|
||||||
|
*/
|
||||||
|
public RegistryImportException(String message)
|
||||||
|
{
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create an exception with an inner exception and a message
|
||||||
|
*
|
||||||
|
* @param message
|
||||||
|
* @param e
|
||||||
|
*/
|
||||||
|
public RegistryImportException(String message, Throwable e)
|
||||||
|
{
|
||||||
|
super(message, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create an exception with an inner exception
|
||||||
|
*
|
||||||
|
* @param e
|
||||||
|
*/
|
||||||
|
public RegistryImportException(Throwable e)
|
||||||
|
{
|
||||||
|
super(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,183 @@
|
|||||||
|
/*
|
||||||
|
* RegistryImporter.java
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006, Imperial College London. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of Imperial College nor the names of their
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.dspace.administer;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.cli.PosixParser;
|
||||||
|
|
||||||
|
import org.apache.xpath.XPathAPI;
|
||||||
|
|
||||||
|
import org.dspace.administer.DCType;
|
||||||
|
import org.dspace.authorize.AuthorizeException;
|
||||||
|
import org.dspace.content.MetadataField;
|
||||||
|
import org.dspace.content.MetadataSchema;
|
||||||
|
import org.dspace.content.NonUniqueMetadataException;
|
||||||
|
import org.dspace.core.Context;
|
||||||
|
import org.dspace.core.LogManager;
|
||||||
|
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.w3c.dom.NamedNodeMap;
|
||||||
|
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Richard Jones
|
||||||
|
*
|
||||||
|
* This class provides the tools that registry importers might need to
|
||||||
|
* use. Basically some utility methods. And actually, although it says
|
||||||
|
* I am the author, really I ripped these methods off from other
|
||||||
|
* classes
|
||||||
|
*/
|
||||||
|
public class RegistryImporter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Load in the XML from file.
|
||||||
|
*
|
||||||
|
* @param filename
|
||||||
|
* the filename to load from
|
||||||
|
*
|
||||||
|
* @return the DOM representation of the XML file
|
||||||
|
*/
|
||||||
|
public static Document loadXML(String filename)
|
||||||
|
throws IOException, ParserConfigurationException, SAXException
|
||||||
|
{
|
||||||
|
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
|
||||||
|
.newDocumentBuilder();
|
||||||
|
|
||||||
|
Document document = builder.parse(new File(filename));
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the CDATA of a particular element. For example, if the XML document
|
||||||
|
* contains:
|
||||||
|
* <P>
|
||||||
|
* <code>
|
||||||
|
* <foo><mimetype>application/pdf</mimetype></foo>
|
||||||
|
* </code>
|
||||||
|
* passing this the <code>foo</code> node and <code>mimetype</code> will
|
||||||
|
* return <code>application/pdf</code>.
|
||||||
|
* </P>
|
||||||
|
* Why this isn't a core part of the XML API I do not know...
|
||||||
|
*
|
||||||
|
* @param parentElement
|
||||||
|
* the element, whose child element you want the CDATA from
|
||||||
|
* @param childName
|
||||||
|
* the name of the element you want the CDATA from
|
||||||
|
*
|
||||||
|
* @return the CDATA as a <code>String</code>
|
||||||
|
*/
|
||||||
|
public static String getElementData(Node parentElement, String childName)
|
||||||
|
throws TransformerException
|
||||||
|
{
|
||||||
|
// Grab the child node
|
||||||
|
Node childNode = XPathAPI.selectSingleNode(parentElement, childName);
|
||||||
|
|
||||||
|
if (childNode == null)
|
||||||
|
{
|
||||||
|
// No child node, so no values
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the #text
|
||||||
|
Node dataNode = childNode.getFirstChild();
|
||||||
|
|
||||||
|
if (dataNode == null)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the data
|
||||||
|
String value = dataNode.getNodeValue().trim();
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get repeated CDATA for a particular element. For example, if the XML
|
||||||
|
* document contains:
|
||||||
|
* <P>
|
||||||
|
* <code>
|
||||||
|
* <foo>
|
||||||
|
* <bar>val1</bar>
|
||||||
|
* <bar>val2</bar>
|
||||||
|
* </foo>
|
||||||
|
* </code>
|
||||||
|
* passing this the <code>foo</code> node and <code>bar</code> will
|
||||||
|
* return <code>val1</code> and <code>val2</code>.
|
||||||
|
* </P>
|
||||||
|
* Why this also isn't a core part of the XML API I do not know...
|
||||||
|
*
|
||||||
|
* @param parentElement
|
||||||
|
* the element, whose child element you want the CDATA from
|
||||||
|
* @param childName
|
||||||
|
* the name of the element you want the CDATA from
|
||||||
|
*
|
||||||
|
* @return the CDATA as a <code>String</code>
|
||||||
|
*/
|
||||||
|
public static String[] getRepeatedElementData(Node parentElement,
|
||||||
|
String childName) throws TransformerException
|
||||||
|
{
|
||||||
|
// Grab the child node
|
||||||
|
NodeList childNodes = XPathAPI.selectNodeList(parentElement, childName);
|
||||||
|
|
||||||
|
String[] data = new String[childNodes.getLength()];
|
||||||
|
|
||||||
|
for (int i = 0; i < childNodes.getLength(); i++)
|
||||||
|
{
|
||||||
|
// Get the #text node
|
||||||
|
Node dataNode = childNodes.item(i).getFirstChild();
|
||||||
|
|
||||||
|
// Get the data
|
||||||
|
data[i] = dataNode.getNodeValue().trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,193 @@
|
|||||||
|
/*
|
||||||
|
* SchemaImporter.java
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006, Imperial College London. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of Imperial College nor the names of their
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.dspace.administer;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.cli.PosixParser;
|
||||||
|
|
||||||
|
import org.apache.xpath.XPathAPI;
|
||||||
|
|
||||||
|
import org.dspace.administer.DCType;
|
||||||
|
import org.dspace.authorize.AuthorizeException;
|
||||||
|
import org.dspace.content.MetadataField;
|
||||||
|
import org.dspace.content.MetadataSchema;
|
||||||
|
import org.dspace.content.NonUniqueMetadataException;
|
||||||
|
import org.dspace.core.Context;
|
||||||
|
import org.dspace.core.LogManager;
|
||||||
|
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.w3c.dom.NamedNodeMap;
|
||||||
|
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Richard Jones
|
||||||
|
*
|
||||||
|
* This class takes an xml document as passed in the arguments and
|
||||||
|
* uses it to create the required metadata schemas for the repository.
|
||||||
|
* This needs to be run before the MetadataImporter if there are
|
||||||
|
* metadata elements in that document that rely on schemas imported here
|
||||||
|
*
|
||||||
|
* The form of the XML is as follows
|
||||||
|
*
|
||||||
|
* <metadata-schemas>
|
||||||
|
* <schema>
|
||||||
|
* <name>dc</name>
|
||||||
|
* <namespace>http://dublincore.org/documents/dcmi-terms/</namespace>
|
||||||
|
* </schema>
|
||||||
|
* </metadata-schemas>
|
||||||
|
*/
|
||||||
|
public class SchemaImporter
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main method for collecting arguments from the command line
|
||||||
|
*/
|
||||||
|
public static void main(String[] args)
|
||||||
|
throws RegistryImportException, ParseException, ParserConfigurationException
|
||||||
|
{
|
||||||
|
// create an options object and populate it
|
||||||
|
CommandLineParser parser = new PosixParser();
|
||||||
|
Options options = new Options();
|
||||||
|
options.addOption("f", "file", true, "source xml file for registry");
|
||||||
|
CommandLine line = parser.parse(options, args);
|
||||||
|
|
||||||
|
String file = null;
|
||||||
|
if (line.hasOption('f'))
|
||||||
|
{
|
||||||
|
file = line.getOptionValue('f');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
loadRegistry(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the data from the specified file path into the database
|
||||||
|
*
|
||||||
|
* @param file the file path containing the source data
|
||||||
|
*/
|
||||||
|
public static void loadRegistry(String file)
|
||||||
|
throws RegistryImportException
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// create a context
|
||||||
|
Context context = new Context();
|
||||||
|
context.setIgnoreAuthorization(true);
|
||||||
|
|
||||||
|
// read the XML
|
||||||
|
Document document = RegistryImporter.loadXML(file);
|
||||||
|
|
||||||
|
// Get the nodes corresponding to types
|
||||||
|
NodeList typeNodes = XPathAPI.selectNodeList(document, "/metadata-schemas/schema");
|
||||||
|
|
||||||
|
// Add each one as a new format to the registry
|
||||||
|
for (int i = 0; i < typeNodes.getLength(); i++)
|
||||||
|
{
|
||||||
|
Node n = typeNodes.item(i);
|
||||||
|
loadSchema(context, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.complete();
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
throw new RegistryImportException("there was a problem loading the schema registry", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a node in the metadata registry XML file. If the
|
||||||
|
* schema already exists, it will not be recreated
|
||||||
|
*
|
||||||
|
* @param context
|
||||||
|
* DSpace context object
|
||||||
|
* @param node
|
||||||
|
* the node in the DOM tree
|
||||||
|
* @throws NonUniqueMetadataException
|
||||||
|
*/
|
||||||
|
private static void loadSchema(Context context, Node node)
|
||||||
|
throws SQLException, IOException, TransformerException,
|
||||||
|
AuthorizeException, NonUniqueMetadataException
|
||||||
|
{
|
||||||
|
// Get the values
|
||||||
|
String name = RegistryImporter.getElementData(node, "name");
|
||||||
|
String namespace = RegistryImporter.getElementData(node, "namespace");
|
||||||
|
|
||||||
|
System.out.print("Registering Schema: " + name + " - " + namespace + " ... ");
|
||||||
|
|
||||||
|
// check to see if the schema already exists
|
||||||
|
MetadataSchema s = MetadataSchema.find(context, name);
|
||||||
|
|
||||||
|
if (s != null)
|
||||||
|
{
|
||||||
|
System.out.println("already exists, skipping");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MetadataSchema schema = new MetadataSchema(namespace, name);
|
||||||
|
schema.create(context);
|
||||||
|
System.out.println("created");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print the usage message to stdout
|
||||||
|
*/
|
||||||
|
public static void usage()
|
||||||
|
{
|
||||||
|
String usage = "Use this class with the following option:\n" +
|
||||||
|
" -f <xml source file> : specify which xml source file " +
|
||||||
|
"contains the schemas to import.\n";
|
||||||
|
System.out.println(usage);
|
||||||
|
}
|
||||||
|
}
|
@@ -1,3 +1,6 @@
|
|||||||
|
(Richard Jones)
|
||||||
|
- S.F. Patch 1670093 More stable metadata and schema registry import
|
||||||
|
|
||||||
(Richard Jones / Graham Triggs)
|
(Richard Jones / Graham Triggs)
|
||||||
- New Browse code that allows customisation of the available indexes via dspace.cfg,
|
- New Browse code that allows customisation of the available indexes via dspace.cfg,
|
||||||
and pluggable normalisation of the sort strings.
|
and pluggable normalisation of the sort strings.
|
||||||
|
17
dspace/config/registries/schemas.xml
Normal file
17
dspace/config/registries/schemas.xml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
- schemas.xml
|
||||||
|
-
|
||||||
|
- Registry of metadata schemas required by the default DSpace instance
|
||||||
|
-
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- start of XML -->
|
||||||
|
|
||||||
|
<metadata-schemas>
|
||||||
|
<schema>
|
||||||
|
<name>dc</name>
|
||||||
|
<namespace>http://dublincore.org/documents/dcmi-terms/</namespace>
|
||||||
|
</schema>
|
||||||
|
</metadata-schemas>
|
@@ -207,8 +207,9 @@ property2.name = ${dspace.dir}/rest/of/path
|
|||||||
<h2><a name="registries" id="registries">The Metadata and Bitstream Format Registries</a></h2>
|
<h2><a name="registries" id="registries">The Metadata and Bitstream Format Registries</a></h2>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
The <code><i>[dspace]</i>/config/registries</code> directory contains two XML files.
|
The <code><i>[dspace]</i>/config/registries</code> directory contains three XML files.
|
||||||
These are used to load the <em>initial</em> contents of the <a href="appendix.html#dublincoreregistry">Dublin Core Metadata registry</a>
|
These are used to load the <em>initial</em> contents of the Metadata Schema Registry,
|
||||||
|
<a href="appendix.html#dublincoreregistry">Dublin Core Metadata registry</a>
|
||||||
and <a href="appendix.html#bitstreamformatregistry">Bitstream Format registry</a>.
|
and <a href="appendix.html#bitstreamformatregistry">Bitstream Format registry</a>.
|
||||||
After the initial loading (performed by <code>ant fresh_install</code> above),
|
After the initial loading (performed by <code>ant fresh_install</code> above),
|
||||||
the registries reside in the database; the XML files are not updated.
|
the registries reside in the database; the XML files are not updated.
|
||||||
@@ -216,23 +217,67 @@ property2.name = ${dspace.dir}/rest/of/path
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
In order to change the registries, you may adjust the XML files before the first installation of DSpace.
|
In order to change the registries, you may adjust the XML files before the first installation of DSpace.
|
||||||
On an allready running instance it is recommended to change the registries via DSpace admin UI.
|
On an allready running instance it is recommended to change bitstream registries via DSpace admin UI, but
|
||||||
|
the metadata registries can be loaded again at any time from the XML files without difficult.
|
||||||
The changes made via admin UI are not reflected in the XML files.
|
The changes made via admin UI are not reflected in the XML files.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<h3>Metadata Schema Registry</h3>
|
||||||
|
|
||||||
|
<p>The default metadata schema in DSpace is Dublin Core, so it is distributed with a single entry in
|
||||||
|
the source XML file for that namespace. If you wish to add more schemas you can do this in one of two
|
||||||
|
ways. Via the DSpace admin UI you may define new Metadata Schemas, edit existing schemas and move
|
||||||
|
elements between schemas. But you may also modify the XML file (or provide an additional one), and
|
||||||
|
re-import the data as follows:</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
[dspace]/bin/dsrun org.dspace.adminster.SchemaImporter -f [xml file]
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
The XML file should be structured as follows:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<metadata-schemas>
|
||||||
|
<schema>
|
||||||
|
<name>[schema name]</name>
|
||||||
|
<namespace>http://myu.edu/some/namespace</namespace>
|
||||||
|
</schema>
|
||||||
|
</metadata-schemas>
|
||||||
|
</pre>
|
||||||
|
|
||||||
<h3>Metadata Format Registries</h3>
|
<h3>Metadata Format Registries</h3>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
The default metadata schema is Dublin Core, so DSpace is distributed with a default Dublin Core Metadata Registry.
|
The default metadata schema is Dublin Core, so DSpace is distributed with a default Dublin Core Metadata Registry.
|
||||||
Currently, the system requires that every item have a Dublin Core record.<br/>
|
Currently, the system requires that every item have a Dublin Core record.</p>
|
||||||
Via the DSpace admin UI you may define new Metadata Schemas, edit existing schemas and move elements between schemas.<br/>
|
<p>There is a set of Dublin Core Elements, which is used by the system and should not be removed or moved to another schema,
|
||||||
There is a set of Dublin Core Elements, which is used by the system and should not be removed or moved to another schema,
|
see <a href="appendix.html#dublincoreregistry">Appendix: Default Dublin Core Metadata registry</a>.</p>
|
||||||
see <a href="appendix.html#dublincoreregistry">Appendix: Default Dublin Core Metadata registry</a>.<br/>
|
<p><strong>Note</strong>: altering a Metadata Registry has no effect on corresponding parts, e.g. item submission interface, item display,
|
||||||
<strong>Note</strong>: altering a Metadata Registry has no effect on corresponding parts, e.g. item submission interface, item display,
|
item import and vice versa. Every metadata element used in submission interface or item import must be registered before using it.</p>
|
||||||
item import and vice versa. Every metadata element used in submission interface or item import must be registered before using it.<br />
|
<p><strong>Note</strong> also that deleting a metadata element will delete all its corresponding values.
|
||||||
<strong>Note</strong> also that deleting a metadata element will delete all its corresponding values.
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p>If you wish to add more metadata elements, you can do this in one of two ways. Via the DSpace admin UI you may define
|
||||||
|
new metadata elements in the different available schemas. But you may also modify the XML file (or provide an additional one),
|
||||||
|
and re-import the data as follows:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
[dspace]/bin/dsrun org.dspace.adminster.MetadataImporter -f [xml file]
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
The XML file should be structured as follows:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<dspace-dc-types>
|
||||||
|
<dc-type>
|
||||||
|
<schema>dc</schema>
|
||||||
|
<element>contributor</element>
|
||||||
|
<qualifier>advisor</qualifier>
|
||||||
|
<scope_note>Use primarily for thesis advisor.</scope_note>
|
||||||
|
</dc-type>
|
||||||
|
</dspace-dc-types>
|
||||||
|
</pre>
|
||||||
|
|
||||||
<h3>Bitstream Format Registry</h3>
|
<h3>Bitstream Format Registry</h3>
|
||||||
<p>
|
<p>
|
||||||
The bitstream formats recognized by the system and levels of support are similarly stored in the bitstream format registry.
|
The bitstream formats recognized by the system and levels of support are similarly stored in the bitstream format registry.
|
||||||
|
@@ -300,9 +300,6 @@ CREATE TABLE MetadataValue
|
|||||||
place INTEGER
|
place INTEGER
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Create the DC schema
|
|
||||||
INSERT INTO MetadataSchemaRegistry VALUES (getnextid('metadataschemaregistry'),'http://dublincore.org/documents/dcmi-terms/','dc');
|
|
||||||
|
|
||||||
-- Create a dcvalue view for backwards compatibilty
|
-- Create a dcvalue view for backwards compatibilty
|
||||||
CREATE VIEW dcvalue AS
|
CREATE VIEW dcvalue AS
|
||||||
SELECT MetadataValue.metadata_value_id AS "dc_value_id", MetadataValue.item_id,
|
SELECT MetadataValue.metadata_value_id AS "dc_value_id", MetadataValue.item_id,
|
||||||
@@ -457,8 +454,8 @@ CREATE TABLE WorkspaceItem
|
|||||||
published_before BOOL,
|
published_before BOOL,
|
||||||
multiple_files BOOL,
|
multiple_files BOOL,
|
||||||
-- How for the user has got in the submit process
|
-- How for the user has got in the submit process
|
||||||
stage_reached INTEGER
|
stage_reached INTEGER,
|
||||||
page_reached INTEGER;
|
page_reached INTEGER
|
||||||
);
|
);
|
||||||
|
|
||||||
-------------------------------------------------------
|
-------------------------------------------------------
|
||||||
|
@@ -286,12 +286,19 @@ Common usage:
|
|||||||
- properties -->
|
- properties -->
|
||||||
<target name="setup_database" description="Create database tables">
|
<target name="setup_database" description="Create database tables">
|
||||||
|
|
||||||
|
<!-- Load the Schema -->
|
||||||
<java classname="org.dspace.storage.rdbms.InitializeDatabase" classpathref="class.path" fork="yes" failonerror="yes">
|
<java classname="org.dspace.storage.rdbms.InitializeDatabase" classpathref="class.path" fork="yes" failonerror="yes">
|
||||||
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
||||||
<sysproperty key="dspace.configuration" value="${config}" />
|
<sysproperty key="dspace.configuration" value="${config}" />
|
||||||
<arg value="database_schema.sql" />
|
<arg value="database_schema.sql" />
|
||||||
</java>
|
</java>
|
||||||
|
|
||||||
|
<!-- Add the browse tables -->
|
||||||
|
<java classname="org.dspace.browse.IndexBrowse" classpathref="class.path" fork="yes" failonerror="yes">
|
||||||
|
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
||||||
|
<sysproperty key="dspace.configuration" value="${config}" />
|
||||||
|
<arg line="-t -x" />
|
||||||
|
</java>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
|
||||||
@@ -320,6 +327,7 @@ Common usage:
|
|||||||
<!-- Loads bitstream format and Dublin Core type registries -->
|
<!-- Loads bitstream format and Dublin Core type registries -->
|
||||||
<target name="load_registries" description="Load initial contents of registries">
|
<target name="load_registries" description="Load initial contents of registries">
|
||||||
|
|
||||||
|
<!-- first import the bitstream registry -->
|
||||||
<java classname="org.dspace.administer.RegistryLoader" classpathref="class.path" fork="yes" failonerror="yes">
|
<java classname="org.dspace.administer.RegistryLoader" classpathref="class.path" fork="yes" failonerror="yes">
|
||||||
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
||||||
<sysproperty key="dspace.configuration" value="${config}" />
|
<sysproperty key="dspace.configuration" value="${config}" />
|
||||||
@@ -327,11 +335,18 @@ Common usage:
|
|||||||
<arg value="${dspace.dir}/config/registries/bitstream-formats.xml" />
|
<arg value="${dspace.dir}/config/registries/bitstream-formats.xml" />
|
||||||
</java>
|
</java>
|
||||||
|
|
||||||
<java classname="org.dspace.administer.RegistryLoader" classpathref="class.path" fork="yes" failonerror="yes">
|
<!-- next import the metadata schema records -->
|
||||||
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties" />
|
<java classname="org.dspace.administer.SchemaImporter" classpathref="class.path" fork="yes" failonerror="yes">
|
||||||
<sysproperty key="dspace.configuration" value="${config}" />
|
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties"/>
|
||||||
<arg value="-dc" />
|
<sysproperty key="dspace.configuration" value="${config}"/>
|
||||||
<arg value="${dspace.dir}/config/registries/dublin-core-types.xml" />
|
<arg line="-f ${dspace.dir}/config/registries/schemas.xml"/>
|
||||||
|
</java>
|
||||||
|
|
||||||
|
<!-- finally import the metadata elements -->
|
||||||
|
<java classname="org.dspace.administer.MetadataImporter" classpathref="class.path" fork="yes" failonerror="yes">
|
||||||
|
<sysproperty key="log4j.configuration" value="file:config/log4j-console.properties"/>
|
||||||
|
<sysproperty key="dspace.configuration" value="${config}"/>
|
||||||
|
<arg line="-f ${dspace.dir}/config/registries/dublin-core-types.xml"/>
|
||||||
</java>
|
</java>
|
||||||
|
|
||||||
</target>
|
</target>
|
||||||
|
Reference in New Issue
Block a user