Merge pull request #1266 from Georgetown-University-Libraries/ds-3026

[DS-3026] REST Reports - Rely on Apache Commons to parse comma separated properties
This commit is contained in:
Tim Donohue
2016-02-18 08:23:13 -06:00
4 changed files with 35 additions and 28 deletions

View File

@@ -20,6 +20,11 @@ public class ItemFilterDefs implements ItemFilterList {
public static final String CAT_ITEM = "Item Property Filters";
public static final String CAT_BASIC = "Basic Bitstream Filters";
public static final String CAT_MIME = "Bitstream Filters by MIME Type";
public static final String[] MIMES_PDF = {"application/pdf"};
public static final String[] MIMES_JPG = {"image/jpeg"};
private enum EnumItemFilterDefs implements ItemFilterTest {
is_item("Is Item - always true", null, CAT_ITEM) {
public boolean testItem(Context context, Item item) {
@@ -99,12 +104,12 @@ public class ItemFilterDefs implements ItemFilterList {
},
has_pdf_original("Item has a PDF Original Bitstream", null, CAT_MIME) {
public boolean testItem(Context context, Item item) {
return ItemFilterUtil.countOriginalBitstreamMime(context, item, "application/pdf") > 0;
return ItemFilterUtil.countOriginalBitstreamMime(context, item, MIMES_PDF) > 0;
}
},
has_jpg_original("Item has JPG Original Bitstream", null, CAT_MIME) {
public boolean testItem(Context context, Item item) {
return ItemFilterUtil.countOriginalBitstreamMime(context, item, "image/jpeg") > 0;
return ItemFilterUtil.countOriginalBitstreamMime(context, item, MIMES_JPG) > 0;
}
},
;

View File

@@ -66,23 +66,23 @@ public class ItemFilterDefsMisc implements ItemFilterList {
},
has_small_pdf("Has unusually small PDF", null, ItemFilterDefs.CAT_MIME) {
public boolean testItem(Context context, Item item) {
return ItemFilterUtil.countBitstreamSmallerThanMinSize(context, BundleName.ORIGINAL, item, "application/pdf", "rest.report-pdf-min-size") > 0;
return ItemFilterUtil.countBitstreamSmallerThanMinSize(context, BundleName.ORIGINAL, item, ItemFilterDefs.MIMES_PDF, "rest.report-pdf-min-size") > 0;
}
},
has_large_pdf("Has unusually large PDF", null, ItemFilterDefs.CAT_MIME) {
public boolean testItem(Context context, Item item) {
return ItemFilterUtil.countBitstreamLargerThanMaxSize(context, BundleName.ORIGINAL, item, "application/pdf", "rest.report-pdf-max-size") > 0;
return ItemFilterUtil.countBitstreamLargerThanMaxSize(context, BundleName.ORIGINAL, item, ItemFilterDefs.MIMES_PDF, "rest.report-pdf-max-size") > 0;
}
},
has_unsupported_bundle("Has bitstream in an unsuppored bundle", null, CAT_MISC) {
public boolean testItem(Context context, Item item) {
String bundleList = DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("rest.report-supp-bundles");
String[] bundleList = DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("rest.report-supp-bundles");
return ItemFilterUtil.hasUnsupportedBundle(item, bundleList);
}
},
has_small_thumbnail("Has unusually small thumbnail", null, CAT_MISC) {
public boolean testItem(Context context, Item item) {
return ItemFilterUtil.countBitstreamSmallerThanMinSize(context, BundleName.THUMBNAIL, item, "image/jpeg", "rest.report-thumbnail-min-size") > 0;
return ItemFilterUtil.countBitstreamSmallerThanMinSize(context, BundleName.THUMBNAIL, item, ItemFilterDefs.MIMES_JPG, "rest.report-thumbnail-min-size") > 0;
}
},
has_doc_without_text("Has document bitstream without TEXT item", null, ItemFilterDefs.CAT_MIME) {
@@ -122,7 +122,7 @@ public class ItemFilterDefsMisc implements ItemFilterList {
},
has_non_generated_thumb("Has non generated thumbnail", null, CAT_MISC) {
public boolean testItem(Context context, Item item) {
String generatedThumbDesc = DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("rest.report-gen-thumbnail-desc");
String[] generatedThumbDesc = DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("rest.report-gen-thumbnail-desc");
int countThumb = ItemFilterUtil.countBitstream(BundleName.THUMBNAIL, item);
if (countThumb == 0) {
return false;

View File

@@ -29,16 +29,16 @@ public class ItemFilterUtil {
static Logger log = Logger.getLogger(ItemFilterUtil.class);
public enum BundleName{ORIGINAL,TEXT,LICENSE,THUMBNAIL;}
static String getDocumentMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("rest.report-mime-document");
static String[] getDocumentMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("rest.report-mime-document");
}
static String getSupportedDocumentMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("rest.report-mime-document-supported");
static String[] getSupportedDocumentMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("rest.report-mime-document-supported");
}
static String getSupportedImageMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("rest.report-mime-document-image");
static String[] getSupportedImageMimeTypes() {
return DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("rest.report-mime-document-image");
}
static int countOriginalBitstream(Item item) {
@@ -70,17 +70,17 @@ public class ItemFilterUtil {
}
static int countOriginalBitstreamMime(Context context, Item item, String mimeList) {
static int countOriginalBitstreamMime(Context context, Item item, String[] mimeList) {
return countBitstreamMime(context, BundleName.ORIGINAL, item, mimeList);
}
static int countBitstreamMime(Context context, BundleName bundleName, Item item, String mimeList) {
static int countBitstreamMime(Context context, BundleName bundleName, Item item, String[] mimeList) {
int count = 0;
for(Bundle bundle: item.getBundles()){
if (!bundle.getName().equals(bundleName.name())){
continue;
}
for(Bitstream bit: bundle.getBitstreams()) {
for(String mime: mimeList.split(",")) {
for(String mime: mimeList) {
try {
if (bit.getFormat(context).getMIMEType().equals(mime.trim())) {
count++;
@@ -94,14 +94,14 @@ public class ItemFilterUtil {
return count;
}
static int countBitstreamByDesc(BundleName bundleName, Item item, String descList) {
static int countBitstreamByDesc(BundleName bundleName, Item item, String[] descList) {
int count = 0;
for(Bundle bundle: item.getBundles()){
if (!bundle.getName().equals(bundleName.name())){
continue;
}
for(Bitstream bit: bundle.getBitstreams()) {
for(String desc: descList.split(",")) {
for(String desc: descList) {
String bitDesc = bit.getDescription();
if (bitDesc == null) {
continue;
@@ -115,7 +115,7 @@ public class ItemFilterUtil {
return count;
}
static int countBitstreamSmallerThanMinSize(Context context, BundleName bundleName, Item item, String mimeList, String prop) {
static int countBitstreamSmallerThanMinSize(Context context, BundleName bundleName, Item item, String[] mimeList, String prop) {
long size = DSpaceServicesFactory.getInstance().getConfigurationService().getLongProperty(prop);
int count = 0;
try {
@@ -124,7 +124,7 @@ public class ItemFilterUtil {
continue;
}
for(Bitstream bit: bundle.getBitstreams()) {
for(String mime: mimeList.split(",")) {
for(String mime: mimeList) {
if (bit.getFormat(context).getMIMEType().equals(mime.trim())) {
if (bit.getSize() < size) {
count++;
@@ -138,7 +138,7 @@ public class ItemFilterUtil {
return count;
}
static int countBitstreamLargerThanMaxSize(Context context, BundleName bundleName, Item item, String mimeList, String prop) {
static int countBitstreamLargerThanMaxSize(Context context, BundleName bundleName, Item item, String[] mimeList, String prop) {
long size = DSpaceServicesFactory.getInstance().getConfigurationService().getLongProperty(prop);
int count = 0;
try {
@@ -147,7 +147,7 @@ public class ItemFilterUtil {
continue;
}
for(Bitstream bit: bundle.getBitstreams()) {
for(String mime: mimeList.split(",")) {
for(String mime: mimeList) {
if (bit.getFormat(context).getMIMEType().equals(mime.trim())) {
if (bit.getSize() > size) {
count++;
@@ -182,12 +182,12 @@ public class ItemFilterUtil {
return count;
}
static boolean hasUnsupportedBundle(Item item, String bundleList) {
static boolean hasUnsupportedBundle(Item item, String[] bundleList) {
if (bundleList == null) {
return false;
}
ArrayList<String> bundles = new ArrayList<String>();
for(String bundleName: bundleList.split(",")) {
for(String bundleName: bundleList) {
bundles.add(bundleName.trim());
}
for(Bundle bundle: item.getBundles()) {
@@ -197,10 +197,10 @@ public class ItemFilterUtil {
}
return false;
}
static boolean hasOriginalBitstreamMime(Context context, Item item, String mimeList) {
static boolean hasOriginalBitstreamMime(Context context, Item item, String[] mimeList) {
return hasBitstreamMime(context, BundleName.ORIGINAL, item, mimeList);
}
static boolean hasBitstreamMime(Context context, BundleName bundleName, Item item, String mimeList) {
static boolean hasBitstreamMime(Context context, BundleName bundleName, Item item, String[] mimeList) {
return countBitstreamMime(context, bundleName, item, mimeList) > 0;
}

View File

@@ -127,7 +127,8 @@ rest.report-regex-compound-subject = .*;.*
rest.report-regex-compound-author = .* and .*
# regex to detect unbreaking metadata - detect long unbreaking text that may not render properly on a page
rest.report-regex-unbreaking = ^.*[^ ]{50,50}.*$
# Be sure to escape commas to prevent Apache Config from breaking regex into array values
rest.report-regex-unbreaking = ^.*[^ ]{50\,50}.*$
# regex to detect url in description - detect description fields that contain URL's
rest.report-regex-url = ^.*(http://|https://|mailto:).*$
@@ -137,7 +138,8 @@ rest.report-regex-url = ^.*(http://|https://|mailto:).*$
rest.report-regex-fulltext = ^.*No\\. of bitstreams(.|\\r|\\n|\\r\\n)*\\.(PDF|pdf|DOC|doc|PPT|ppt|DOCX|docx|PPTX|pptx).*$
# regex to identify very long metadata fields that may be slow to render
rest.report-regex-long = ^[\\s\\S]{6000,}$
# Be sure to escape commas to prevent Apache Config from breaking regex into array values
rest.report-regex-long = ^[\\s\\S]{6000\,}$
# regex to identify partial XML entities within a description field (a frequent problem found in ProQuest ETD's)
rest.report-regex-xml-entity = ^.*&#.*$