diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php index 6d10cab4bb..a9c062ce9f 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php @@ -202,6 +202,16 @@ class Indexer 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', 'filter' => ['nfkc_normalizer', 'asciifolding'] + ], + // Thesaurus specific + 'thesaurus_path' => [ + 'type' => 'custom', + 'tokenizer' => 'thesaurus_path' + ] + ], + 'tokenizer' => [ + 'thesaurus_path' => [ + 'type' => 'path_hierarchy' ] ], 'filter' => [ diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php index 9caf4c38c9..85ecd3a0f7 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php @@ -31,30 +31,33 @@ class TermIndexer */ private $appbox; + private $navigator; + public function __construct(\appbox $appbox) { $this->appbox = $appbox; + $this->navigator = new Navigator(); } public function populateIndex(BulkOperation $bulk) { - // TODO Create object to query thesaurus for term paths/synonyms - - $navigator = new Navigator(); - foreach ($this->appbox->get_databoxes() as $databox) { + $databoxId = $databox->get_sbas_id(); $document = self::thesaurusFromDatabox($databox); - $visitor = new TermVisitor(function ($term) use ($bulk) { + $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) { printf("- %s (%s)\n", $term['path'], $term['value']); - }); - $navigator->walk($document, $visitor); - - while ($record = false) { + // Term structure + $id = $term['id']; + unset($term['id']); + $term['databox_id'] = $databoxId; + // Index request $params = array(); - $params['id'] = $record['id']; - $params['body'] = $record; + $params['id'] = $id; + $params['type'] = self::TYPE_NAME; + $params['body'] = $term; $bulk->index($params); - } + }); + $this->navigator->walk($document, $visitor); } } @@ -72,9 +75,12 @@ class TermIndexer { $mapping = new Mapping(); $mapping + ->add('raw_value', 'string')->notAnalyzed() ->add('value', 'string') ->add('context', 'string') ->add('path', 'string') + ->analyzer('thesaurus_path', 'indexing') + ->analyzer('keyword', 'searching') ->add('lang', 'string')->notAnalyzed() ->add('databox_id', 'integer') ; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php index e6337564b8..a5e3f06f51 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php @@ -90,6 +90,31 @@ class Mapping return $properties; } + public function analyzer($analyzer, $type = null) + { + $field = &$this->currentField(); + if ($field['type'] !== self::TYPE_STRING) { + throw new LogicException('Only string fields can be analyzed'); + } + switch ($type) { + case null: + $field['analyzer'] = $analyzer; + unset($field['index_analyzer'], $field['search_analyzer']); + break; + case 'indexing': + $field['index_analyzer'] = $analyzer; + break; + case 'searching': + $field['search_analyzer'] = $analyzer; + break; + default: + throw new LogicException(sprintf('Invalid analyzer type "%s".', $type)); + } + $field['index'] = 'analyzed'; + + return $this; + } + public function notAnalyzed() { $field = &$this->currentField(); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php index 489147a893..ea4a762013 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/TermVisitor.php @@ -24,6 +24,7 @@ use DOMNode; class TermVisitor implements VisitorInterface { const TERM_TAG_NAME = 'sy'; + const TERM_ID_ATTR = 'id'; const TERM_LANG_ATTR = 'lng'; const TERM_VALUE_ATTR = 'v'; // So, this is a huuuge regex to match a group of words eventually followed @@ -54,7 +55,8 @@ class TermVisitor implements VisitorInterface $term = $this->parseTermValue($value); $term += [ 'path' => $this->getCurrentPathAsString(), - 'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR) + 'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR), + 'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR) ]; call_user_func($this->termCallback, $term);