diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php index d115fd37ab..676b8f4bb1 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php @@ -36,9 +36,9 @@ class ThesaurusHydrator implements HydratorInterface // Fields with concept inference enabled $structure = $this->helper->getFieldsStructure(); $fields = array(); - foreach ($structure as $field => $options) { + foreach ($structure as $name => $options) { if ($options['thesaurus_concept_inference']) { - $fields[$field] = $options['thesaurus_prefix']; + $fields[$name] = $options['thesaurus_prefixes']; } } // Hydrate records with concepts @@ -54,12 +54,13 @@ class ThesaurusHydrator implements HydratorInterface } $terms = array(); - $fieldMap = array(); - foreach ($fields as $field => $prefix) { - if (isset($record['caption'][$field])) { - foreach ($record['caption'][$field] as $value) { + $bulkFieldMap = array(); + foreach ($fields as $name => $prefixes) { + if (isset($record['caption'][$name])) { + // Loop through all values to prepare bulk query + foreach ($record['caption'][$name] as $value) { $terms[] = Term::parse($value); - $fieldMap[] = $field; + $bulkFieldMap[] = $name; } } } @@ -70,12 +71,12 @@ class ThesaurusHydrator implements HydratorInterface foreach ($bulk as $offset => $item_concepts) { if ($item_concepts) { - $field = $fieldMap[$offset]; + $name = $bulkFieldMap[$offset]; foreach ($item_concepts as $concept) { - $record['concept_path'][$field][] = $concept->getPath(); + $record['concept_path'][$name][] = $concept->getPath(); } } else { - $this->candidateTerms->insert($field, $value); + $this->candidateTerms->insert($name, $value); } } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php index c6b944c183..18f28ef336 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php @@ -169,13 +169,14 @@ class RecordHelper $field['to_aggregate'] = (bool) $fieldStructure->isAggregable(); // Thesaurus concept inference - // $xpath = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']"; - $helper = new ThesaurusHelper(); - - // TODO Not the real option yet - $field['thesaurus_concept_inference'] = $field['type'] === Mapping::TYPE_STRING; - // TODO Find thesaurus path prefixes - $field['thesaurus_prefix'] = '/categories'; + $xpath = $fieldStructure->get_tbranch(); + if ($field['type'] === Mapping::TYPE_STRING && $xpath ==! '') { + $field['thesaurus_concept_inference'] = true; + $field['thesaurus_prefixes'] = ThesaurusHelper::findPrefixesByXPath($databox, $xpath); + } else { + $field['thesaurus_concept_inference'] = false; + $field['thesaurus_prefixes'] = null; + } //printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php index 216a9faba7..4ab61cc1a1 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php @@ -11,27 +11,53 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; +use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException; +use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils; use databox; use DOMDocument; use DOMElement; use DOMNode; +use DOMNodeList; use DOMXPath; -use Elasticsearch\Client; class Helper { - public function findNodesByXPath($document, $xpath) + const TERM_LANG_ATTR = 'lng'; + const TERM_VALUE_ATTR = 'v'; + const PATH_LANG = 'en'; + + public static function findPrefixesByXPath(databox $databox, $expression) { - $tbranch = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']"; - $xpath = new \DOMXPath($document); - $nodeList = $xpath->query($tbranch); - $conceptIds = []; - foreach ($nodeList as $node) { - if ($node->hasAttribute('id')) { - $conceptIds[] = $node->getAttribute('id'); + $document = self::thesaurusFromDatabox($databox); + $xpath = new DOMXPath($document); + $nodes = $xpath->query($expression); + $prefixes = []; + foreach ($nodes as $node) { + $path_segments = []; + $me_and_parents = [$node]; + foreach (self::getElementAncestors($node) as $me_and_parents[]); + foreach ($me_and_parents as $node) { + if (Navigator::isConcept($node)) { + $path_segments[] = self::conceptPathSegment($node); + } else { + // Silently skips invalid targeted nodes + break; + } } + $prefixes[] = sprintf('/%s', implode('/', array_reverse($path_segments))); } + return $prefixes; + } + + private static function getElementAncestors(DOMElement $element) + { + $parents = []; + while ($element = $element->parentNode) { + $parents[] = $element; + } + + return $parents; } public static function thesaurusFromDatabox(databox $databox) @@ -64,4 +90,49 @@ class Helper return $document; } + + public static function conceptPathSegment(DOMElement $element) + { + // Path segment is named according to the first english term, and + // default to the first term. + $terms = self::filter($element->childNodes, array(Navigator::class, 'isTerm')); + $term = self::find($terms, array('self', 'isPathLang')); + if (!$term) { + if (isset($terms[0])) { + $term = $terms[0]; + } else { + throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath())); + } + } + + return StringUtils::slugify($term->getAttribute(self::TERM_VALUE_ATTR)); + } + + private static function isPathLang(DOMElement $element) + { + return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG; + } + + // DOM Helpers + + private static function filter(DOMNodeList $list, callable $callback) + { + $filtered = []; + foreach ($list as $node) { + if (call_user_func($callback, $node)) { + $filtered[] = $node; + } + } + + return $filtered; + } + + private static function find(array $list, callable $callback) + { + foreach ($list as $node) { + if (call_user_func($callback, $node)) { + return $node; + } + } + } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Navigator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Navigator.php index c742755305..18b0ccf637 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Navigator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Navigator.php @@ -45,12 +45,12 @@ class Navigator } } - private function isConcept(DOMNode $node) + public static function isConcept(DOMNode $node) { return $node instanceof DOMElement && $node->tagName === self::CONCEPT_TAG_NAME; } - private function isTerm(DOMNode $node) + public static function isTerm(DOMNode $node) { return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME; } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php index 5239cd42c6..9c61133b28 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php @@ -11,24 +11,17 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; -use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils; -use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException; +use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper; use Closure; use Elasticsearch\Client; -use DOMDocument; -use DOMNodeList; use DOMElement; -use DOMNode; class TermVisitor implements VisitorInterface { - const TERM_TAG_NAME = 'sy'; const TERM_ID_ATTR = 'id'; const TERM_LANG_ATTR = 'lng'; const TERM_VALUE_ATTR = 'v'; - const PATH_LANG = 'en'; - private $path = []; private $termCallback; @@ -39,20 +32,20 @@ class TermVisitor implements VisitorInterface public function visitConcept(DOMElement $element) { - array_push($this->path, $this->getConceptPathSegment($element)); + array_push($this->path, Helper::conceptPathSegment($element)); } public function visitTerm(DOMElement $element) { - $raw_value = $this->getTermValue($element); + $raw_value = $element->getAttribute(self::TERM_VALUE_ATTR); $object = Term::parse($raw_value); $term = [ 'raw_value' => $raw_value, 'value' => $object->getValue(), 'context' => $object->getContext(), 'path' => $this->getCurrentPathAsString(), - 'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR), - 'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR) + 'lang' => $element->getAttribute(self::TERM_LANG_ATTR), + 'id' => $element->getAttribute(self::TERM_ID_ATTR) ]; call_user_func($this->termCallback, $term); @@ -67,66 +60,4 @@ class TermVisitor implements VisitorInterface { return sprintf('/%s', implode('/', $this->path)); } - - private function getConceptPathSegment(DOMElement $element) - { - // Path segment is named according to the first english term, and - // default to the first term. - $terms = $this->filter($element->childNodes, array($this, 'isTerm')); - $term = $this->find($terms, array($this, 'isPathLang')); - if (!$term) { - if (isset($terms[0])) { - $term = $terms[0]; - } else { - throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath())); - } - } - - return StringUtils::slugify($this->getTermValue($term)); - } - - private function isTerm(DOMNode $node) - { - return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME; - } - - private function isPathLang(DOMElement $element) - { - return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG; - } - - private function getTermValue(DOMElement $term) - { - return $this->getTermAttribute($term, self::TERM_VALUE_ATTR); - } - - private function getTermAttribute(DOMElement $term, $attribute) - { - if ($term->hasAttribute($attribute)) { - return $term->getAttribute($attribute); - } - } - - // DOM Helpers - - private function filter(DOMNodeList $list, Callable $callback) - { - $filtered = []; - foreach ($list as $node) { - if (call_user_func($callback, $node)) { - $filtered[] = $node; - } - } - - return $filtered; - } - - private function find(array $list, Callable $callback) - { - foreach ($list as $node) { - if (call_user_func($callback, $node)) { - return $node; - } - } - } }