mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-12 20:43:25 +00:00
Thesaurus prefixes in field structure
Also fixes candidates collected from all string fields
This commit is contained in:
@@ -36,9 +36,9 @@ class ThesaurusHydrator implements HydratorInterface
|
||||
// Fields with concept inference enabled
|
||||
$structure = $this->helper->getFieldsStructure();
|
||||
$fields = array();
|
||||
foreach ($structure as $field => $options) {
|
||||
foreach ($structure as $name => $options) {
|
||||
if ($options['thesaurus_concept_inference']) {
|
||||
$fields[$field] = $options['thesaurus_prefix'];
|
||||
$fields[$name] = $options['thesaurus_prefixes'];
|
||||
}
|
||||
}
|
||||
// Hydrate records with concepts
|
||||
@@ -54,12 +54,13 @@ class ThesaurusHydrator implements HydratorInterface
|
||||
}
|
||||
|
||||
$terms = array();
|
||||
$fieldMap = array();
|
||||
foreach ($fields as $field => $prefix) {
|
||||
if (isset($record['caption'][$field])) {
|
||||
foreach ($record['caption'][$field] as $value) {
|
||||
$bulkFieldMap = array();
|
||||
foreach ($fields as $name => $prefixes) {
|
||||
if (isset($record['caption'][$name])) {
|
||||
// Loop through all values to prepare bulk query
|
||||
foreach ($record['caption'][$name] as $value) {
|
||||
$terms[] = Term::parse($value);
|
||||
$fieldMap[] = $field;
|
||||
$bulkFieldMap[] = $name;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,12 +71,12 @@ class ThesaurusHydrator implements HydratorInterface
|
||||
|
||||
foreach ($bulk as $offset => $item_concepts) {
|
||||
if ($item_concepts) {
|
||||
$field = $fieldMap[$offset];
|
||||
$name = $bulkFieldMap[$offset];
|
||||
foreach ($item_concepts as $concept) {
|
||||
$record['concept_path'][$field][] = $concept->getPath();
|
||||
$record['concept_path'][$name][] = $concept->getPath();
|
||||
}
|
||||
} else {
|
||||
$this->candidateTerms->insert($field, $value);
|
||||
$this->candidateTerms->insert($name, $value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -169,13 +169,14 @@ class RecordHelper
|
||||
$field['to_aggregate'] = (bool) $fieldStructure->isAggregable();
|
||||
|
||||
// Thesaurus concept inference
|
||||
// $xpath = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
||||
$helper = new ThesaurusHelper();
|
||||
|
||||
// TODO Not the real option yet
|
||||
$field['thesaurus_concept_inference'] = $field['type'] === Mapping::TYPE_STRING;
|
||||
// TODO Find thesaurus path prefixes
|
||||
$field['thesaurus_prefix'] = '/categories';
|
||||
$xpath = $fieldStructure->get_tbranch();
|
||||
if ($field['type'] === Mapping::TYPE_STRING && $xpath ==! '') {
|
||||
$field['thesaurus_concept_inference'] = true;
|
||||
$field['thesaurus_prefixes'] = ThesaurusHelper::findPrefixesByXPath($databox, $xpath);
|
||||
} else {
|
||||
$field['thesaurus_concept_inference'] = false;
|
||||
$field['thesaurus_prefixes'] = null;
|
||||
}
|
||||
|
||||
//printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
||||
|
||||
|
@@ -11,27 +11,53 @@
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
||||
use databox;
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use DOMNodeList;
|
||||
use DOMXPath;
|
||||
use Elasticsearch\Client;
|
||||
|
||||
class Helper
|
||||
{
|
||||
public function findNodesByXPath($document, $xpath)
|
||||
const TERM_LANG_ATTR = 'lng';
|
||||
const TERM_VALUE_ATTR = 'v';
|
||||
const PATH_LANG = 'en';
|
||||
|
||||
public static function findPrefixesByXPath(databox $databox, $expression)
|
||||
{
|
||||
$tbranch = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
||||
$xpath = new \DOMXPath($document);
|
||||
$nodeList = $xpath->query($tbranch);
|
||||
$conceptIds = [];
|
||||
foreach ($nodeList as $node) {
|
||||
if ($node->hasAttribute('id')) {
|
||||
$conceptIds[] = $node->getAttribute('id');
|
||||
$document = self::thesaurusFromDatabox($databox);
|
||||
$xpath = new DOMXPath($document);
|
||||
$nodes = $xpath->query($expression);
|
||||
$prefixes = [];
|
||||
foreach ($nodes as $node) {
|
||||
$path_segments = [];
|
||||
$me_and_parents = [$node];
|
||||
foreach (self::getElementAncestors($node) as $me_and_parents[]);
|
||||
foreach ($me_and_parents as $node) {
|
||||
if (Navigator::isConcept($node)) {
|
||||
$path_segments[] = self::conceptPathSegment($node);
|
||||
} else {
|
||||
// Silently skips invalid targeted nodes
|
||||
break;
|
||||
}
|
||||
}
|
||||
$prefixes[] = sprintf('/%s', implode('/', array_reverse($path_segments)));
|
||||
}
|
||||
|
||||
return $prefixes;
|
||||
}
|
||||
|
||||
private static function getElementAncestors(DOMElement $element)
|
||||
{
|
||||
$parents = [];
|
||||
while ($element = $element->parentNode) {
|
||||
$parents[] = $element;
|
||||
}
|
||||
|
||||
return $parents;
|
||||
}
|
||||
|
||||
public static function thesaurusFromDatabox(databox $databox)
|
||||
@@ -64,4 +90,49 @@ class Helper
|
||||
|
||||
return $document;
|
||||
}
|
||||
|
||||
public static function conceptPathSegment(DOMElement $element)
|
||||
{
|
||||
// Path segment is named according to the first english term, and
|
||||
// default to the first term.
|
||||
$terms = self::filter($element->childNodes, array(Navigator::class, 'isTerm'));
|
||||
$term = self::find($terms, array('self', 'isPathLang'));
|
||||
if (!$term) {
|
||||
if (isset($terms[0])) {
|
||||
$term = $terms[0];
|
||||
} else {
|
||||
throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath()));
|
||||
}
|
||||
}
|
||||
|
||||
return StringUtils::slugify($term->getAttribute(self::TERM_VALUE_ATTR));
|
||||
}
|
||||
|
||||
private static function isPathLang(DOMElement $element)
|
||||
{
|
||||
return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG;
|
||||
}
|
||||
|
||||
// DOM Helpers
|
||||
|
||||
private static function filter(DOMNodeList $list, callable $callback)
|
||||
{
|
||||
$filtered = [];
|
||||
foreach ($list as $node) {
|
||||
if (call_user_func($callback, $node)) {
|
||||
$filtered[] = $node;
|
||||
}
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
private static function find(array $list, callable $callback)
|
||||
{
|
||||
foreach ($list as $node) {
|
||||
if (call_user_func($callback, $node)) {
|
||||
return $node;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -45,12 +45,12 @@ class Navigator
|
||||
}
|
||||
}
|
||||
|
||||
private function isConcept(DOMNode $node)
|
||||
public static function isConcept(DOMNode $node)
|
||||
{
|
||||
return $node instanceof DOMElement && $node->tagName === self::CONCEPT_TAG_NAME;
|
||||
}
|
||||
|
||||
private function isTerm(DOMNode $node)
|
||||
public static function isTerm(DOMNode $node)
|
||||
{
|
||||
return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME;
|
||||
}
|
||||
|
@@ -11,24 +11,17 @@
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\ThesaurusException;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper;
|
||||
use Closure;
|
||||
use Elasticsearch\Client;
|
||||
use DOMDocument;
|
||||
use DOMNodeList;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
|
||||
class TermVisitor implements VisitorInterface
|
||||
{
|
||||
const TERM_TAG_NAME = 'sy';
|
||||
const TERM_ID_ATTR = 'id';
|
||||
const TERM_LANG_ATTR = 'lng';
|
||||
const TERM_VALUE_ATTR = 'v';
|
||||
|
||||
const PATH_LANG = 'en';
|
||||
|
||||
private $path = [];
|
||||
private $termCallback;
|
||||
|
||||
@@ -39,20 +32,20 @@ class TermVisitor implements VisitorInterface
|
||||
|
||||
public function visitConcept(DOMElement $element)
|
||||
{
|
||||
array_push($this->path, $this->getConceptPathSegment($element));
|
||||
array_push($this->path, Helper::conceptPathSegment($element));
|
||||
}
|
||||
|
||||
public function visitTerm(DOMElement $element)
|
||||
{
|
||||
$raw_value = $this->getTermValue($element);
|
||||
$raw_value = $element->getAttribute(self::TERM_VALUE_ATTR);
|
||||
$object = Term::parse($raw_value);
|
||||
$term = [
|
||||
'raw_value' => $raw_value,
|
||||
'value' => $object->getValue(),
|
||||
'context' => $object->getContext(),
|
||||
'path' => $this->getCurrentPathAsString(),
|
||||
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR),
|
||||
'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR)
|
||||
'lang' => $element->getAttribute(self::TERM_LANG_ATTR),
|
||||
'id' => $element->getAttribute(self::TERM_ID_ATTR)
|
||||
];
|
||||
|
||||
call_user_func($this->termCallback, $term);
|
||||
@@ -67,66 +60,4 @@ class TermVisitor implements VisitorInterface
|
||||
{
|
||||
return sprintf('/%s', implode('/', $this->path));
|
||||
}
|
||||
|
||||
private function getConceptPathSegment(DOMElement $element)
|
||||
{
|
||||
// Path segment is named according to the first english term, and
|
||||
// default to the first term.
|
||||
$terms = $this->filter($element->childNodes, array($this, 'isTerm'));
|
||||
$term = $this->find($terms, array($this, 'isPathLang'));
|
||||
if (!$term) {
|
||||
if (isset($terms[0])) {
|
||||
$term = $terms[0];
|
||||
} else {
|
||||
throw new ThesaurusException(sprintf('No term linked to concept at path "%s".', $element->getNodePath()));
|
||||
}
|
||||
}
|
||||
|
||||
return StringUtils::slugify($this->getTermValue($term));
|
||||
}
|
||||
|
||||
private function isTerm(DOMNode $node)
|
||||
{
|
||||
return $node instanceof DOMElement && $node->tagName === self::TERM_TAG_NAME;
|
||||
}
|
||||
|
||||
private function isPathLang(DOMElement $element)
|
||||
{
|
||||
return $element->getAttribute(self::TERM_LANG_ATTR) === self::PATH_LANG;
|
||||
}
|
||||
|
||||
private function getTermValue(DOMElement $term)
|
||||
{
|
||||
return $this->getTermAttribute($term, self::TERM_VALUE_ATTR);
|
||||
}
|
||||
|
||||
private function getTermAttribute(DOMElement $term, $attribute)
|
||||
{
|
||||
if ($term->hasAttribute($attribute)) {
|
||||
return $term->getAttribute($attribute);
|
||||
}
|
||||
}
|
||||
|
||||
// DOM Helpers
|
||||
|
||||
private function filter(DOMNodeList $list, Callable $callback)
|
||||
{
|
||||
$filtered = [];
|
||||
foreach ($list as $node) {
|
||||
if (call_user_func($callback, $node)) {
|
||||
$filtered[] = $node;
|
||||
}
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
private function find(array $list, Callable $callback)
|
||||
{
|
||||
foreach ($list as $node) {
|
||||
if (call_user_func($callback, $node)) {
|
||||
return $node;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user