Thesaurus term inference command

This commit is contained in:
Mathieu Darse
2014-12-15 12:25:18 +01:00
parent 877551f50c
commit 65d4334dd1
4 changed files with 104 additions and 2 deletions

View File

@@ -19,6 +19,7 @@ use Alchemy\Phrasea\Command\SearchEngine\IndexCreateCommand;
use Alchemy\Phrasea\Command\SearchEngine\IndexDropCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexDropCommand;
use Alchemy\Phrasea\Command\SearchEngine\IndexFull; use Alchemy\Phrasea\Command\SearchEngine\IndexFull;
use Alchemy\Phrasea\Command\SearchEngine\IndexPopulateCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexPopulateCommand;
use Alchemy\Phrasea\Command\Thesaurus\FindConceptsCommand;
use Alchemy\Phrasea\Command\WebsocketServer; use Alchemy\Phrasea\Command\WebsocketServer;
use Alchemy\Phrasea\Core\Version; use Alchemy\Phrasea\Core\Version;
use Alchemy\Phrasea\Command\BuildMissingSubdefs; use Alchemy\Phrasea\Command\BuildMissingSubdefs;
@@ -129,6 +130,7 @@ if ($cli['search_engine.type'] === SearchEngineInterface::TYPE_ELASTICSEARCH) {
$cli->command(new IndexDropCommand()); $cli->command(new IndexDropCommand());
$cli->command(new IndexPopulateCommand()); $cli->command(new IndexPopulateCommand());
$cli->command(new QueryParseCommand()); $cli->command(new QueryParseCommand());
$cli->command(new FindConceptsCommand());
} }
$cli->command(new WebsocketServer('ws-server:run')); $cli->command(new WebsocketServer('ws-server:run'));

View File

@@ -0,0 +1,68 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Command\Thesaurus;
use Alchemy\Phrasea\Command\Command;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
class FindConceptsCommand extends Command
{
protected function configure()
{
$this
->setName('thesaurus:find:concepts')
->setDescription('Infer concepts using thesaurus')
->addArgument(
'term',
InputArgument::REQUIRED,
'Reverse search a term to infer concepts'
)
->addOption(
'locale',
null,
InputOption::VALUE_REQUIRED,
'Specify input locale'
)
->addOption(
'raw',
null,
InputOption::VALUE_NONE,
'Only output raw concepts'
)
;
}
protected function doExecute(InputInterface $input, OutputInterface $output)
{
$term = $input->getArgument('term');
$raw = $input->getOption('raw');
if (!$raw) {
$output->writeln(sprintf('Finding linked concepts: <comment>%s</comment>', $term));
$output->writeln(str_repeat('-', 20));
}
$thesaurus = $this->container['thesaurus'];
$locale = $input->getOption('locale');
$concepts = $thesaurus->findConcepts($term, null, $locale);
if (count($concepts)) {
$output->writeln($concepts);
} elseif (!$raw) {
$output->writeln('No concept found');
}
}
}

View File

@@ -123,7 +123,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
return array_replace($defaults, $options); return array_replace($defaults, $options);
}); });
$app['elasticsearch.thesaurus'] = $app->share(function ($app) { $app['thesaurus'] = $app->share(function ($app) {
return new Thesaurus( return new Thesaurus(
$app['elasticsearch.client'], $app['elasticsearch.client'],
$app['elasticsearch.options']['index'] $app['elasticsearch.options']['index']

View File

@@ -11,6 +11,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic; namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer;
use Elasticsearch\Client; use Elasticsearch\Client;
class Thesaurus class Thesaurus
@@ -24,8 +25,39 @@ class Thesaurus
$this->index = $index; $this->index = $index;
} }
public function findConceptPath($term, $context = null, $lang = null) public function findConcepts($term, $context = null, $lang = null)
{ {
// TODO Check that term queries are ok with multiple words
$query = array();
$query['term']['value'] = $term;
if ($context) {
$term_query = $query;
$query = array();
$query['bool']['must'][0] = $term_query;
$query['bool']['must'][1]['term']['context'] = $context;
}
// Path deduplication
$aggs = array();
$aggs['dedup']['terms']['field'] = 'path';
// Search request
$params = array();
$params['type'] = TermIndexer::TYPE_NAME;
$params['body']['query'] = $query;
$params['body']['aggs'] = $aggs;
$response = $this->client->search($params);
// Extract concept paths from response
$concepts = array();
$buckets = \igorw\get_in($response, ['aggregations', 'dedup', 'buckets'], []);
foreach ($buckets as $bucket) {
if (isset($bucket['key'])) {
$concepts[] = $bucket['key'];
}
}
return $concepts;
} }
} }