mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-13 04:53:26 +00:00
Thesaurus prefixes in field structure
Also fixes candidates collected from all string fields
This commit is contained in:
@@ -36,9 +36,9 @@ class ThesaurusHydrator implements HydratorInterface
|
|||||||
// Fields with concept inference enabled
|
// Fields with concept inference enabled
|
||||||
$structure = $this->helper->getFieldsStructure();
|
$structure = $this->helper->getFieldsStructure();
|
||||||
$fields = array();
|
$fields = array();
|
||||||
foreach ($structure as $field => $options) {
|
foreach ($structure as $name => $options) {
|
||||||
if ($options['thesaurus_concept_inference']) {
|
if ($options['thesaurus_concept_inference']) {
|
||||||
$fields[$field] = $options['thesaurus_prefix'];
|
$fields[$name] = $options['thesaurus_prefixes'];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Hydrate records with concepts
|
// Hydrate records with concepts
|
||||||
@@ -54,12 +54,13 @@ class ThesaurusHydrator implements HydratorInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
$terms = array();
|
$terms = array();
|
||||||
$fieldMap = array();
|
$bulkFieldMap = array();
|
||||||
foreach ($fields as $field => $prefix) {
|
foreach ($fields as $name => $prefixes) {
|
||||||
if (isset($record['caption'][$field])) {
|
if (isset($record['caption'][$name])) {
|
||||||
foreach ($record['caption'][$field] as $value) {
|
// Loop through all values to prepare bulk query
|
||||||
|
foreach ($record['caption'][$name] as $value) {
|
||||||
$terms[] = Term::parse($value);
|
$terms[] = Term::parse($value);
|
||||||
$fieldMap[] = $field;
|
$bulkFieldMap[] = $name;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -70,12 +71,12 @@ class ThesaurusHydrator implements HydratorInterface
|
|||||||
|
|
||||||
foreach ($bulk as $offset => $item_concepts) {
|
foreach ($bulk as $offset => $item_concepts) {
|
||||||
if ($item_concepts) {
|
if ($item_concepts) {
|
||||||
$field = $fieldMap[$offset];
|
$name = $bulkFieldMap[$offset];
|
||||||
foreach ($item_concepts as $concept) {
|
foreach ($item_concepts as $concept) {
|
||||||
$record['concept_path'][$field][] = $concept->getPath();
|
$record['concept_path'][$name][] = $concept->getPath();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$this->candidateTerms->insert($field, $value);
|
$this->candidateTerms->insert($name, $value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -169,13 +169,14 @@ class RecordHelper
|
|||||||
$field['to_aggregate'] = (bool) $fieldStructure->isAggregable();
|
$field['to_aggregate'] = (bool) $fieldStructure->isAggregable();
|
||||||
|
|
||||||
// Thesaurus concept inference
|
// Thesaurus concept inference
|
||||||
// $xpath = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
$xpath = $fieldStructure->get_tbranch();
|
||||||
$helper = new ThesaurusHelper();
|
if ($field['type'] === Mapping::TYPE_STRING && $xpath ==! '') {
|
||||||
|
$field['thesaurus_concept_inference'] = true;
|
||||||
// TODO Not the real option yet
|
$field['thesaurus_prefixes'] = ThesaurusHelper::findPrefixesByXPath($databox, $xpath);
|
||||||
$field['thesaurus_concept_inference'] = $field['type'] === Mapping::TYPE_STRING;
|
} else {
|
||||||
// TODO Find thesaurus path prefixes
|
$field['thesaurus_concept_inference'] = false;
|
||||||
$field['thesaurus_prefix'] = '/categories';
|
$field['thesaurus_prefixes'] = null;
|
||||||
|
}
|
||||||
|
|
||||||
//printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
//printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
||||||
|
|
||||||
|
@@ -11,27 +11,53 @@
|
|||||||
|
|
||||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||||
|
|
||||||
|
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException;
|
||||||
|
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
||||||
use databox;
|
use databox;
|
||||||
use DOMDocument;
|
use DOMDocument;
|
||||||
use DOMElement;
|
use DOMElement;
|
||||||
use DOMNode;
|
use DOMNode;
|
||||||
|
use DOMNodeList;
|
||||||
use DOMXPath;
|
use DOMXPath;
|
||||||
use Elasticsearch\Client;
|
|
||||||
|
|
||||||
class Helper
|
class Helper
|
||||||
{
|
{
|
||||||
public function findNodesByXPath($document, $xpath)
|
const TERM_LANG_ATTR = 'lng';
|
||||||
|
const TERM_VALUE_ATTR = 'v';
|
||||||
|
const PATH_LANG = 'en';
|
||||||
|
|
||||||
|
public static function findPrefixesByXPath(databox $databox, $expression)
|
||||||
{
|
{
|
||||||
$tbranch = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
$document = self::thesaurusFromDatabox($databox);
|
||||||
$xpath = new \DOMXPath($document);
|
$xpath = new DOMXPath($document);
|
||||||
$nodeList = $xpath->query($tbranch);
|
$nodes = $xpath->query($expression);
|
||||||
$conceptIds = [];
|
$prefixes = [];
|
||||||
foreach ($nodeList as $node) {
|
foreach ($nodes as $node) {
|
||||||
if ($node->hasAttribute('id')) {
|
$path_segments = [];
|
||||||
$conceptIds[] = $node->getAttribute('id');
|
$me_and_parents = [$node];
|
||||||
|
foreach (self::getElementAncestors($node) as $me_and_parents[]);
|
||||||
|
foreach ($me_and_parents as $node) {
|
||||||
|
if (Navigator::isConcept($node)) {
|
||||||
|
$path_segments[] = self::conceptPathSegment($node);
|
||||||
|
} else {
|
||||||
|
// Silently skips invalid targeted nodes
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
$prefixes[] = sprintf('/%s', implode('/', array_reverse($path_segments)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $prefixes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function getElementAncestors(DOMElement $element)
|
||||||
|
{
|
||||||
|
$parents = [];
|
||||||
|
while ($element = $element->parentNode) {
|
||||||
|
$parents[] = $element;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $parents;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function thesaurusFromDatabox(databox $databox)
|
public static function thesaurusFromDatabox(databox $databox)
|
||||||
@@ -64,4 +90,49 @@ class Helper
|
|||||||
|
|
||||||
return $document;
|
return $document;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function conceptPathSegment(DOMElement $element)
|
||||||
|
{
|
||||||
|
// Path segment is named according to the first english term, and
|
||||||
|
// default to the first term.
|
||||||
|
$terms = self::filter($element->childNodes, array(Navigator::class, 'isTerm'));
|
||||||
|
$term = self::find($terms, array('self', 'isPathLang'));
|
||||||
|
if (!$term) {
|
||||||
|
if (isset($terms[0])) {
|
||||||
|
$term = $terms[0];
|
||||||
|
} else {
|
||||||
|
throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return StringUtils::slugify($term->getAttribute(self::TERM_VALUE_ATTR));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function isPathLang(DOMElement $element)
|
||||||
|
{
|
||||||
|
return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DOM Helpers
|
||||||
|
|
||||||
|
private static function filter(DOMNodeList $list, callable $callback)
|
||||||
|
{
|
||||||
|
$filtered = [];
|
||||||
|
foreach ($list as $node) {
|
||||||
|
if (call_user_func($callback, $node)) {
|
||||||
|
$filtered[] = $node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $filtered;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function find(array $list, callable $callback)
|
||||||
|
{
|
||||||
|
foreach ($list as $node) {
|
||||||
|
if (call_user_func($callback, $node)) {
|
||||||
|
return $node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -45,12 +45,12 @@ class Navigator
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isConcept(DOMNode $node)
|
public static function isConcept(DOMNode $node)
|
||||||
{
|
{
|
||||||
return $node instanceof DOMElement && $node->tagName === self::CONCEPT_TAG_NAME;
|
return $node instanceof DOMElement && $node->tagName === self::CONCEPT_TAG_NAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isTerm(DOMNode $node)
|
public static function isTerm(DOMNode $node)
|
||||||
{
|
{
|
||||||
return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME;
|
return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME;
|
||||||
}
|
}
|
||||||
|
@@ -11,24 +11,17 @@
|
|||||||
|
|
||||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||||
|
|
||||||
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper;
|
||||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException;
|
|
||||||
use Closure;
|
use Closure;
|
||||||
use Elasticsearch\Client;
|
use Elasticsearch\Client;
|
||||||
use DOMDocument;
|
|
||||||
use DOMNodeList;
|
|
||||||
use DOMElement;
|
use DOMElement;
|
||||||
use DOMNode;
|
|
||||||
|
|
||||||
class TermVisitor implements VisitorInterface
|
class TermVisitor implements VisitorInterface
|
||||||
{
|
{
|
||||||
const TERM_TAG_NAME = 'sy';
|
|
||||||
const TERM_ID_ATTR = 'id';
|
const TERM_ID_ATTR = 'id';
|
||||||
const TERM_LANG_ATTR = 'lng';
|
const TERM_LANG_ATTR = 'lng';
|
||||||
const TERM_VALUE_ATTR = 'v';
|
const TERM_VALUE_ATTR = 'v';
|
||||||
|
|
||||||
const PATH_LANG = 'en';
|
|
||||||
|
|
||||||
private $path = [];
|
private $path = [];
|
||||||
private $termCallback;
|
private $termCallback;
|
||||||
|
|
||||||
@@ -39,20 +32,20 @@ class TermVisitor implements VisitorInterface
|
|||||||
|
|
||||||
public function visitConcept(DOMElement $element)
|
public function visitConcept(DOMElement $element)
|
||||||
{
|
{
|
||||||
array_push($this->path, $this->getConceptPathSegment($element));
|
array_push($this->path, Helper::conceptPathSegment($element));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function visitTerm(DOMElement $element)
|
public function visitTerm(DOMElement $element)
|
||||||
{
|
{
|
||||||
$raw_value = $this->getTermValue($element);
|
$raw_value = $element->getAttribute(self::TERM_VALUE_ATTR);
|
||||||
$object = Term::parse($raw_value);
|
$object = Term::parse($raw_value);
|
||||||
$term = [
|
$term = [
|
||||||
'raw_value' => $raw_value,
|
'raw_value' => $raw_value,
|
||||||
'value' => $object->getValue(),
|
'value' => $object->getValue(),
|
||||||
'context' => $object->getContext(),
|
'context' => $object->getContext(),
|
||||||
'path' => $this->getCurrentPathAsString(),
|
'path' => $this->getCurrentPathAsString(),
|
||||||
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR),
|
'lang' => $element->getAttribute(self::TERM_LANG_ATTR),
|
||||||
'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR)
|
'id' => $element->getAttribute(self::TERM_ID_ATTR)
|
||||||
];
|
];
|
||||||
|
|
||||||
call_user_func($this->termCallback, $term);
|
call_user_func($this->termCallback, $term);
|
||||||
@@ -67,66 +60,4 @@ class TermVisitor implements VisitorInterface
|
|||||||
{
|
{
|
||||||
return sprintf('/%s', implode('/', $this->path));
|
return sprintf('/%s', implode('/', $this->path));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function getConceptPathSegment(DOMElement $element)
|
|
||||||
{
|
|
||||||
// Path segment is named according to the first english term, and
|
|
||||||
// default to the first term.
|
|
||||||
$terms = $this->filter($element->childNodes, array($this, 'isTerm'));
|
|
||||||
$term = $this->find($terms, array($this, 'isPathLang'));
|
|
||||||
if (!$term) {
|
|
||||||
if (isset($terms[0])) {
|
|
||||||
$term = $terms[0];
|
|
||||||
} else {
|
|
||||||
throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return StringUtils::slugify($this->getTermValue($term));
|
|
||||||
}
|
|
||||||
|
|
||||||
private function isTerm(DOMNode $node)
|
|
||||||
{
|
|
||||||
return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function isPathLang(DOMElement $element)
|
|
||||||
{
|
|
||||||
return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getTermValue(DOMElement $term)
|
|
||||||
{
|
|
||||||
return $this->getTermAttribute($term, self::TERM_VALUE_ATTR);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getTermAttribute(DOMElement $term, $attribute)
|
|
||||||
{
|
|
||||||
if ($term->hasAttribute($attribute)) {
|
|
||||||
return $term->getAttribute($attribute);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DOM Helpers
|
|
||||||
|
|
||||||
private function filter(DOMNodeList $list, Callable $callback)
|
|
||||||
{
|
|
||||||
$filtered = [];
|
|
||||||
foreach ($list as $node) {
|
|
||||||
if (call_user_func($callback, $node)) {
|
|
||||||
$filtered[] = $node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $filtered;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function find(array $list, Callable $callback)
|
|
||||||
{
|
|
||||||
foreach ($list as $node) {
|
|
||||||
if (call_user_func($callback, $node)) {
|
|
||||||
return $node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user