diff --git a/bin/console b/bin/console index 3c087c5d48..f7efcd9540 100755 --- a/bin/console +++ b/bin/console @@ -19,6 +19,7 @@ use Alchemy\Phrasea\Command\SearchEngine\IndexCreateCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexDropCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexFull; use Alchemy\Phrasea\Command\SearchEngine\IndexPopulateCommand; +use Alchemy\Phrasea\Command\Thesaurus\FindConceptsCommand; use Alchemy\Phrasea\Command\WebsocketServer; use Alchemy\Phrasea\Core\Version; use Alchemy\Phrasea\Command\BuildMissingSubdefs; @@ -129,6 +130,7 @@ if ($cli['search_engine.type'] === SearchEngineInterface::TYPE_ELASTICSEARCH) { $cli->command(new IndexDropCommand()); $cli->command(new IndexPopulateCommand()); $cli->command(new QueryParseCommand()); + $cli->command(new FindConceptsCommand()); } $cli->command(new WebsocketServer('ws-server:run')); diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/FindConceptsCommand.php b/lib/Alchemy/Phrasea/Command/Thesaurus/FindConceptsCommand.php new file mode 100644 index 0000000000..269f6700dd --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/Thesaurus/FindConceptsCommand.php @@ -0,0 +1,68 @@ +setName('thesaurus:find:concepts') + ->setDescription('Infer concepts using thesaurus') + ->addArgument( + 'term', + InputArgument::REQUIRED, + 'Reverse search a term to infer concepts' + ) + ->addOption( + 'locale', + null, + InputOption::VALUE_REQUIRED, + 'Specify input locale' + ) + ->addOption( + 'raw', + null, + InputOption::VALUE_NONE, + 'Only output raw concepts' + ) + ; + } + + protected function doExecute(InputInterface $input, OutputInterface $output) + { + $term = $input->getArgument('term'); + $raw = $input->getOption('raw'); + + if (!$raw) { + $output->writeln(sprintf('Finding linked concepts: %s', $term)); + $output->writeln(str_repeat('-', 20)); + } + + $thesaurus = $this->container['thesaurus']; + $locale = $input->getOption('locale'); + $concepts = $thesaurus->findConcepts($term, null, $locale); + + if (count($concepts)) { + $output->writeln($concepts); + } elseif (!$raw) { + $output->writeln('No concept found'); + } + } +} diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php index 860337ddf6..8b73dcb61e 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php @@ -123,7 +123,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface return array_replace($defaults, $options); }); - $app['elasticsearch.thesaurus'] = $app->share(function ($app) { + $app['thesaurus'] = $app->share(function ($app) { return new Thesaurus( $app['elasticsearch.client'], $app['elasticsearch.options']['index'] diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php index 6d9e006ee5..7201c50f9a 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php @@ -11,6 +11,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic; +use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer; use Elasticsearch\Client; class Thesaurus @@ -24,8 +25,39 @@ class Thesaurus $this->index = $index; } - public function findConceptPath($term, $context = null, $lang = null) + public function findConcepts($term, $context = null, $lang = null) { + // TODO Check that term queries are ok with multiple words + $query = array(); + $query['term']['value'] = $term; + if ($context) { + $term_query = $query; + $query = array(); + $query['bool']['must'][0] = $term_query; + $query['bool']['must'][1]['term']['context'] = $context; + } + + // Path deduplication + $aggs = array(); + $aggs['dedup']['terms']['field'] = 'path'; + + // Search request + $params = array(); + $params['type'] = TermIndexer::TYPE_NAME; + $params['body']['query'] = $query; + $params['body']['aggs'] = $aggs; + $response = $this->client->search($params); + + // Extract concept paths from response + $concepts = array(); + $buckets = \igorw\get_in($response, ['aggregations', 'dedup', 'buckets'], []); + foreach ($buckets as $bucket) { + if (isset($bucket['key'])) { + $concepts[] = $bucket['key']; + } + } + + return $concepts; } }