#!/bin/bash ############################################################################ # # Version: $Revision$ # # Date: $Date$ # # Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # - Neither the name of the DSpace Foundation nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. # # Author : Cody Green # Digital Initiatives Research # Texas A&M University # Email : codygreen@tamu.edu # Description : Shell script that helps migrate data from a development # server to a production server. It will remove the handle # and any entries in the dublin_core.xml file that will be # duplicated upon importing the data into DSpace. # ############################################################################ # Make sure user is in bash if [ -z "$BASH" ]; then printf "ERROR: please run this script with the BASH shell.\n" exit 192 fi # Check if SED is in the path which sed > /dev/null if [ $? -ne 0 ]; then # Check what OS user has if [ `uname -s` = "SunOS" ]; then # Declare variables declare -rx SED="/usr/bin/sed" fi if [ `uname -s` = "Linux" ]; then # Declare variables declare -rx SED="/bin/sed" fi # Sanity check for programs if [ ! -e $sed ]; then printf "ERROR: Can not find the program sed.\n please check that the program is installed or in your path" exit 1 fi else export SED=`which sed` fi # Check parameter count, show usage if incorrect number passed if [ $# -eq 0 ]; then printf "Usage: dspace_migrate.sh [DIRECTORY]\n" printf "Example: /dspace/bin/dspace_migrate.sh /home/user/ETDs/ \n\n" printf "This script will remove the handle and any \nentries in the dublin_core.xml file that will \nbe duplicated upon importing the data into DSpace.\n\n" exit 0; fi # Check if users needs help if [ $1 = "--help" -o $1 = "--h" ]; then printf "Usage: dspace_migrate.sh [DIRECTORY]\n" printf "Example: /dspace/bin/dspace_migrate.sh /home/user/ETDs/ \n\n" printf "This script will remove the handle and any \nentries in the dublin_core.xml file that will \nbe duplicated upon importing the data into DSpace.\n\n" exit 0; fi # Check to see if the directory exists if [ ! -d $1 ]; then printf "ERROR: $1 is not a valid directory.\n" exit 1; fi printf "Fixing Bad Data in ETDs\n" # Loop through the directory for i in ls $1/*/dublin_core.xml; do # Ignore ls in the returned values if [ $i = "ls" ]; then continue fi printf "Checking $i...\n" # Check if file exists if [ ! -s $i ]; then printf "ERROR: $i does not exist of is empty\n" exit 1 fi # copy file cp $i $i.orig if [ $? -ne 0 ]; then printf "ERROR: Could not copy $i to $i.orig \n" exit 1 fi printf " --removing null and duplicate values for $i\n" issued=`grep "element=\"date\" qualifier=\"issued\"" $i|cut -f2 -d \>|cut -f1 -d \<` available=`grep "element=\"date\" qualifier=\"accessioned\"" $i|cut -f2 -d \>|cut -f1 -d \<` if [ $issued = $available ]; then $SED "/><\/dcvalue>/d" $i | $SED "/element=\"date\" qualifier=\"accessioned\"/d" | $SED "/element=\"date\" qualifier=\"available\"/d" |$SED "/element=\"date\" qualifier=\"issued\"/d" | $SED "/element=\"identifier\" qualifier=\"uri\">http:\/\/hdl/d" | $SED "/element=\"description\" qualifier=\"provenance\"/,/<\/dcvalue>/d" | $SED "/element=\"format\" qualifier=\"extent\"/d" | $SED "/element=\"format\" qualifier=\"mimetype\"/d" | $SED "/bytes, checksum/d" > $i else printf " -- date issued $issued \n" $SED "/><\/dcvalue>/d" $i | $SED "/element=\"date\" qualifier=\"accessioned\"/d" | $SED "/element=\"date\" qualifier=\"available\"/d" | $SED "/element=\"identifier\" qualifier=\"uri\">http:\/\/hdl/d" | $SED "/element=\"description\" qualifier=\"provenance\"/,/<\/dcvalue>/d" | $SED "/element=\"format\" qualifier=\"extent\"/d" | $SED "/element=\"format\" qualifier=\"mimetype\"/d" | $SED "/bytes, checksum/d" > $i fi if [ $? -ne 0 ]; then printf "ERROR: Could not fix $i\n" exit 1 fi # Check to see if filesize is 0 printf " --checking filesize for $i\n" if [ ! -s $i ]; then printf "ERROR: filesize for $i is 0\n" exit 1 fi # remove copy of file rm -f $i.orig if [ $? -ne 0 ]; then printf "ERROR: Could not remove $i.orig\n" exit 1 fi done rm -f $1/*/handle if [ $? -ne 0 ]; then printf "ERROR: Could not delete handles\n" exit 1 fi printf "Data Has Been Fixed\n"