diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php index 922e01bf82..c9b305c07f 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php @@ -18,6 +18,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\RecordIndexer; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer; +use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngine; use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngineSubscriber; use Elasticsearch\Client; @@ -82,6 +83,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface $app['elasticsearch.indexer.record_indexer'] = $app->share(function ($app) { return new RecordIndexer( + $app['elasticsearch.thesaurus'], $app['elasticsearch.engine'], $app['phraseanet.appbox'] ); @@ -108,6 +110,13 @@ class SearchEngineServiceProvider implements ServiceProviderInterface return array_replace($defaults, $options); }); + + $app['elasticsearch.thesaurus'] = $app->share(function ($app) { + return new Thesaurus( + $app['elasticsearch.client'], + $app['elasticsearch.options']['index'] + ); + }); } public function boot(Application $app) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php index a9c062ce9f..8ed445026e 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php @@ -95,8 +95,12 @@ class Indexer $bulk->setAutoFlushLimit(1000); $this->termIndexer->populateIndex($bulk); + // Record indexing depends on indexed terms so we need to flush + // between the two operations + $bulk->flush(); $this->recordIndexer->populateIndex($bulk); + // Final flush $bulk->flush(); // Optimize index diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php index 60ca761905..525e594d51 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php @@ -19,6 +19,8 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\RecordFetcher; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils; +use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; +use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper; use media_subdef; class RecordIndexer @@ -35,8 +37,9 @@ class RecordIndexer */ private $elasticSearchEngine; - public function __construct(ElasticSearchEngine $elasticSearchEngine, \appbox $appbox) + public function __construct(Thesaurus $thesaurus, ElasticSearchEngine $elasticSearchEngine, \appbox $appbox) { + $this->thesaurus = $thesaurus; $this->appbox = $appbox; $this->elasticSearchEngine = $elasticSearchEngine; } @@ -47,12 +50,11 @@ class RecordIndexer $recordHelper = new RecordHelper($this->appbox); foreach ($this->appbox->get_databoxes() as $databox) { - // TODO Pass a BulkOperation object to TermIndexer to muliplex - // indexing queries between types $fetcher = new RecordFetcher($databox, $recordHelper); $fetcher->setBatchSize(200); while ($records = $fetcher->fetch()) { foreach ($records as $record) { + $record['concept_paths'] = $this->findLinkedConcepts($record); $params = array(); $params['id'] = $record['id']; $params['type'] = self::TYPE_NAME; @@ -63,6 +65,11 @@ class RecordIndexer } } + private function findLinkedConcepts($record) + { + return []; + } + public function getMapping() { $mapping = new Mapping(); @@ -81,6 +88,10 @@ class RecordIndexer // Dates ->add('created_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL) ->add('updated_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL) + // Inferred thesaurus concepts + ->add('concept_paths', 'string') + ->analyzer('thesaurus_path', 'indexing') + ->analyzer('keyword', 'searching') ; // Caption mapping @@ -149,6 +160,13 @@ class RecordIndexer $field['indexable'] = $fieldStructure->is_indexable(); $field['to_aggregate'] = false; // @todo, dev in progress + // Thesaurus concept inference + // $xpath = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']"; + $helper = new ThesaurusHelper(); + // TODO Find thesaurus path prefixes + $field['thesaurus_concept_inference'] = true; + $field['thesaurus_prefix'] = '/categories'; + $name = $fieldStructure->get_name(); printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php index 85ecd3a0f7..e4922f76b6 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php @@ -45,7 +45,7 @@ class TermIndexer $databoxId = $databox->get_sbas_id(); $document = self::thesaurusFromDatabox($databox); $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) { - printf("- %s (%s)\n", $term['path'], $term['value']); + // printf("- %s (%s)\n", $term['path'], $term['value']); // Term structure $id = $term['id']; unset($term['id']); @@ -78,9 +78,7 @@ class TermIndexer ->add('raw_value', 'string')->notAnalyzed() ->add('value', 'string') ->add('context', 'string') - ->add('path', 'string') - ->analyzer('thesaurus_path', 'indexing') - ->analyzer('keyword', 'searching') + ->add('path', 'string')->notAnalyzed() ->add('lang', 'string')->notAnalyzed() ->add('databox_id', 'integer') ; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php new file mode 100644 index 0000000000..6d9e006ee5 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php @@ -0,0 +1,31 @@ +client = $client; + $this->index = $index; + } + + public function findConceptPath($term, $context = null, $lang = null) + { + + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php new file mode 100644 index 0000000000..df6be6088f --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php @@ -0,0 +1,35 @@ +query($tbranch); + $conceptIds = []; + foreach ($nodeList as $node) { + if ($node->hasAttribute('id')) { + $conceptIds[] = $node->getAttribute('id'); + } + } + + } +}