129944: Introduce custom abstract xpath contributor for pubmed to respect their labelled structure - modify IT for it

(cherry picked from commit 28bc4970b7)
This commit is contained in:
Jens Vannerum
2025-04-09 17:06:43 +02:00
committed by github-actions[bot]
parent 144d9a25d2
commit 1f174f4657
4 changed files with 100 additions and 37 deletions

View File

@@ -0,0 +1,67 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.pubmed.metadatamapping.contributor;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
import org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
/**
* This class is responsible for extracting the abstract from a PubMed XML document.
* It uses XPath to find the relevant elements and constructs a formatted string for the abstract, respecting
* PubMed's labelled abstract format, and including the labels in the output.
*/
public class PubmedAbstractMetadatumContributor extends SimpleXpathMetadatumContributor {
@Override
public Collection<MetadatumDTO> contributeMetadata(Element t) {
List<MetadatumDTO> values = new LinkedList<>();
List<Namespace> namespaces = new ArrayList<>();
for (String ns : prefixToNamespaceMapping.keySet()) {
namespaces.add(Namespace.getNamespace(prefixToNamespaceMapping.get(ns), ns));
}
XPathExpression<Element> xpath = XPathFactory.instance().compile(query, Filters.element(), null, namespaces);
List<Element> nodes = xpath.evaluate(t);
StringBuilder sb = new StringBuilder();
for (Element el : nodes) {
String label = el.getAttributeValue("Label");
String text = el.getTextNormalize();
if (text == null || text.isEmpty()) {
continue;
}
if (sb.length() > 0) {
sb.append("\n\n");
}
if (label != null && !label.equalsIgnoreCase("UNLABELLED")) {
sb.append(label).append(": ");
}
sb.append(text);
}
String fullAbstract = sb.toString().trim();
if (!fullAbstract.isEmpty()) {
values.add(metadataFieldMapping.toDCValue(field, fullAbstract));
}
return values;
}
}

View File

@@ -101,35 +101,34 @@ public class PubmedImportMetadataSourceServiceIT extends AbstractLiveImportInteg
//define first record //define first record
MetadatumDTO title = createMetadatumDTO("dc","title", null, MetadatumDTO title = createMetadatumDTO("dc","title", null,
"Teaching strategies of clinical reasoning in advanced nursing clinical practice: A scoping review."); "Teaching strategies of clinical reasoning in advanced nursing clinical practice: A scoping review.");
MetadatumDTO description1 = createMetadatumDTO("dc", "description", "abstract", "To report and synthesize" MetadatumDTO description1 = createMetadatumDTO("dc", "description", "abstract",
+ " the main strategies for teaching clinical reasoning described in the literature in the context of" "AIM/OBJECTIVE: To report and synthesize the main strategies for teaching clinical reasoning " +
+ " advanced clinical practice and promote new areas of research to improve the pedagogical approach" "described in the literature in the context of advanced clinical practice and promote new " +
+ " to clinical reasoning in Advanced Practice Nursing."); "areas of research to improve the pedagogical approach to clinical reasoning in Advanced " +
MetadatumDTO description2 = createMetadatumDTO("dc", "description", "abstract", "Clinical reasoning and" "Practice Nursing.\n\nBACKGROUND: Clinical reasoning and clinical thinking are essential " +
+ " clinical thinking are essential elements in the advanced nursing clinical practice decision-making" "elements in the advanced nursing clinical practice decision-making process. The quality " +
+ " process. The quality improvement of care is related to the development of those skills." "improvement of care is related to the development of those skills. Therefore, it is crucial " +
+ " Therefore, it is crucial to optimize teaching strategies that can enhance the role of clinical" "to optimize teaching strategies that can enhance the role of clinical reasoning in advanced " +
+ " reasoning in advanced clinical practice."); "clinical practice.\n\nDESIGN: A scoping review was conducted using the framework developed " +
MetadatumDTO description3 = createMetadatumDTO("dc", "description", "abstract", "A scoping review was" "by Arksey and O'Malley as a research strategy. Consistent with the nature of scoping reviews" +
+ " conducted using the framework developed by Arksey and O'Malley as a research strategy." ", a study protocol has been established.\n\nMETHODS: The studies included and analyzed in " +
+ " Consistent with the nature of scoping reviews, a study protocol has been established."); "this scoping review cover from January 2016 to June 2022. Primary studies and secondary " +
MetadatumDTO description4 = createMetadatumDTO("dc", "description", "abstract", "The studies included and" "revision studies, published in biomedical databases, were selected, including qualitative " +
+ " analyzed in this scoping review cover from January 2016 to June 2022. Primary studies and secondary" "ones. Electronic databases used were: CINAHL, PubMed, Cochrane Library, Scopus, and OVID. " +
+ " revision studies, published in biomedical databases, were selected, including qualitative ones." "Three authors independently evaluated the articles for titles, abstracts, and full text.\n\n" +
+ " Electronic databases used were: CINAHL, PubMed, Cochrane Library, Scopus, and OVID." "RESULTS: 1433 articles were examined, applying the eligibility and exclusion criteria 73 " +
+ " Three authors independently evaluated the articles for titles, abstracts, and full text."); "studies were assessed for eligibility, and 27 were included in the scoping review. The " +
MetadatumDTO description5 = createMetadatumDTO("dc", "description", "abstract", "1433 articles were examined," "results that emerged from the review were interpreted and grouped into three macro " +
+ " applying the eligibility and exclusion criteria 73 studies were assessed for eligibility," "strategies (simulations-based education, art and visual thinking, and other learning " +
+ " and 27 were included in the scoping review. The results that emerged from the review were" "approaches) and nineteen educational interventions.\n\nCONCLUSIONS: Among the different " +
+ " interpreted and grouped into three macro strategies (simulations-based education, art and visual" "strategies, the simulations are the most used. Despite this, our scoping review reveals " +
+ " thinking, and other learning approaches) and nineteen educational interventions."); "that is necessary to use different teaching strategies to stimulate critical thinking, " +
MetadatumDTO description6 = createMetadatumDTO("dc", "description", "abstract", "Among the different" "improve diagnostic reasoning, refine clinical judgment, and strengthen decision-making. " +
+ " strategies, the simulations are the most used. Despite this, our scoping review reveals that is" "However, it is not possible to demonstrate which methodology is more effective in obtaining " +
+ " necessary to use different teaching strategies to stimulate critical thinking, improve diagnostic" "the learning outcomes necessary to acquire an adequate level of judgment and critical " +
+ " reasoning, refine clinical judgment, and strengthen decision-making. However, it is not possible to" "thinking. Therefore, it will be necessary to relate teaching methodologies with the skills " +
+ " demonstrate which methodology is more effective in obtaining the learning outcomes necessary to" "developed.\n\nAn unlabeled section of an abstract.\n\nAn abstract section with no attributes" +
+ " acquire an adequate level of judgment and critical thinking. Therefore, it will be" " at all, concerning.");
+ " necessary to relate teaching methodologies with the skills developed.");
MetadatumDTO identifierOther = createMetadatumDTO("dc", "identifier", "other", "36708638"); MetadatumDTO identifierOther = createMetadatumDTO("dc", "identifier", "other", "36708638");
MetadatumDTO author1 = createMetadatumDTO("dc", "contributor", "author", "Giuffrida, Silvia"); MetadatumDTO author1 = createMetadatumDTO("dc", "contributor", "author", "Giuffrida, Silvia");
MetadatumDTO author2 = createMetadatumDTO("dc", "contributor", "author", "Silano, Verdiana"); MetadatumDTO author2 = createMetadatumDTO("dc", "contributor", "author", "Silano, Verdiana");
@@ -148,11 +147,6 @@ public class PubmedImportMetadataSourceServiceIT extends AbstractLiveImportInteg
metadatums.add(title); metadatums.add(title);
metadatums.add(description1); metadatums.add(description1);
metadatums.add(description2);
metadatums.add(description3);
metadatums.add(description4);
metadatums.add(description5);
metadatums.add(description6);
metadatums.add(identifierOther); metadatums.add(identifierOther);
metadatums.add(author1); metadatums.add(author1);
metadatums.add(author2); metadatums.add(author2);
@@ -210,4 +204,4 @@ public class PubmedImportMetadataSourceServiceIT extends AbstractLiveImportInteg
return records; return records;
} }
} }

View File

@@ -41,6 +41,8 @@
<AbstractText Label="METHODS" NlmCategory="METHODS">The studies included and analyzed in this scoping review cover from January 2016 to June 2022. Primary studies and secondary revision studies, published in biomedical databases, were selected, including qualitative ones. Electronic databases used were: CINAHL, PubMed, Cochrane Library, Scopus, and OVID. Three authors independently evaluated the articles for titles, abstracts, and full text.</AbstractText> <AbstractText Label="METHODS" NlmCategory="METHODS">The studies included and analyzed in this scoping review cover from January 2016 to June 2022. Primary studies and secondary revision studies, published in biomedical databases, were selected, including qualitative ones. Electronic databases used were: CINAHL, PubMed, Cochrane Library, Scopus, and OVID. Three authors independently evaluated the articles for titles, abstracts, and full text.</AbstractText>
<AbstractText Label="RESULTS" NlmCategory="RESULTS">1433 articles were examined, applying the eligibility and exclusion criteria 73 studies were assessed for eligibility, and 27 were included in the scoping review. The results that emerged from the review were interpreted and grouped into three macro strategies (simulations-based education, art and visual thinking, and other learning approaches) and nineteen educational interventions.</AbstractText> <AbstractText Label="RESULTS" NlmCategory="RESULTS">1433 articles were examined, applying the eligibility and exclusion criteria 73 studies were assessed for eligibility, and 27 were included in the scoping review. The results that emerged from the review were interpreted and grouped into three macro strategies (simulations-based education, art and visual thinking, and other learning approaches) and nineteen educational interventions.</AbstractText>
<AbstractText Label="CONCLUSIONS" NlmCategory="CONCLUSIONS">Among the different strategies, the simulations are the most used. Despite this, our scoping review reveals that is necessary to use different teaching strategies to stimulate critical thinking, improve diagnostic reasoning, refine clinical judgment, and strengthen decision-making. However, it is not possible to demonstrate which methodology is more effective in obtaining the learning outcomes necessary to acquire an adequate level of judgment and critical thinking. Therefore, it will be necessary to relate teaching methodologies with the skills developed.</AbstractText> <AbstractText Label="CONCLUSIONS" NlmCategory="CONCLUSIONS">Among the different strategies, the simulations are the most used. Despite this, our scoping review reveals that is necessary to use different teaching strategies to stimulate critical thinking, improve diagnostic reasoning, refine clinical judgment, and strengthen decision-making. However, it is not possible to demonstrate which methodology is more effective in obtaining the learning outcomes necessary to acquire an adequate level of judgment and critical thinking. Therefore, it will be necessary to relate teaching methodologies with the skills developed.</AbstractText>
<AbstractText Label="UNLABELLED">An unlabeled section of an abstract.</AbstractText>
<AbstractText>An abstract section with no attributes at all, concerning.</AbstractText>
<CopyrightInformation>Copyright &#xa9; 2023 Elsevier Ltd. All rights reserved.</CopyrightInformation> <CopyrightInformation>Copyright &#xa9; 2023 Elsevier Ltd. All rights reserved.</CopyrightInformation>
</Abstract> </Abstract>
<AuthorList CompleteYN="Y"> <AuthorList CompleteYN="Y">
@@ -191,4 +193,4 @@
</ArticleIdList> </ArticleIdList>
</PubmedData> </PubmedData>
</PubmedArticle> </PubmedArticle>
</PubmedArticleSet> </PubmedArticleSet>

View File

@@ -57,7 +57,7 @@
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/> <property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean> </bean>
<bean id="abstractContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor"> <bean id="abstractContrib" class="org.dspace.importer.external.pubmed.metadatamapping.contributor.PubmedAbstractMetadatumContributor">
<property name="field" ref="dc.description.abstract"/> <property name="field" ref="dc.description.abstract"/>
<property name="query" value="descendant::AbstractText"/> <property name="query" value="descendant::AbstractText"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/> <property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>