From f2f29c42907eb7efcae6334c3adaf5b0503d1696 Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Mon, 23 Mar 2015 18:56:22 +0100 Subject: [PATCH 1/2] Command to update index mapping --- bin/console | 3 +- .../SearchEngine/Debug/QuerySampleCommand.php | 69 +++++++++++++++++++ .../Phrasea/SearchEngine/Elastic/Indexer.php | 3 +- 3 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 lib/Alchemy/Phrasea/Command/SearchEngine/Debug/QuerySampleCommand.php diff --git a/bin/console b/bin/console index 541d0f95ac..efe960c5da 100755 --- a/bin/console +++ b/bin/console @@ -18,7 +18,7 @@ use Alchemy\Phrasea\Command\Setup\H264MappingGenerator; use Alchemy\Phrasea\Command\SearchEngine\Debug\QueryParseCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexCreateCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexDropCommand; -use Alchemy\Phrasea\Command\SearchEngine\IndexFull; +use Alchemy\Phrasea\Command\SearchEngine\MappingUpdateCommand; use Alchemy\Phrasea\Command\SearchEngine\IndexPopulateCommand; use Alchemy\Phrasea\Command\Thesaurus\FindConceptsCommand; use Alchemy\Phrasea\Command\WebsocketServer; @@ -125,6 +125,7 @@ $cli->command(new XSendFileMappingGenerator()); if ($cli['search_engine.type'] === SearchEngineInterface::TYPE_ELASTICSEARCH) { $cli->command(new IndexCreateCommand()); $cli->command(new IndexDropCommand()); + $cli->command(new MappingUpdateCommand()); $cli->command(new IndexPopulateCommand()); $cli->command(new QueryParseCommand()); $cli->command(new FindConceptsCommand()); diff --git a/lib/Alchemy/Phrasea/Command/SearchEngine/Debug/QuerySampleCommand.php b/lib/Alchemy/Phrasea/Command/SearchEngine/Debug/QuerySampleCommand.php new file mode 100644 index 0000000000..d4c862b2b3 --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/SearchEngine/Debug/QuerySampleCommand.php @@ -0,0 +1,69 @@ +setName('searchengine:query:sample') + ->setDescription('Generate sample queries from grammar') + ; + } + + protected function doExecute(InputInterface $input, OutputInterface $output) + { + $grammarPath = $this->container['query_parser.grammar_path']; + $output->writeln(sprintf('Generating sample queries from %s', $grammarPath)); + $output->writeln(str_repeat('-', 20)); + + $parser = $this->container['query_parser']; + + // UNIFORM + + // $sampler = new \Hoa\Compiler\Llk\Sampler\Uniform( + // $parser, + // new \Hoa\Regex\Visitor\Isotropic(new \Hoa\Math\Sampler\Random()), + // 7 + // ); + + // for($i = 0; $i < 10; ++$i) { + // $output->writeln(sprintf('%d => %s', $i, $sampler->uniform())); + // } + + // BOUNDED EXAUSTIVE + + $sampler = new \Hoa\Compiler\Llk\Sampler\BoundedExhaustive( + $parser, + new \Hoa\Regex\Visitor\Isotropic(new \Hoa\Math\Sampler\Random()), + 6 + ); + + // COVERAGE + + // $sampler = new \Hoa\Compiler\Llk\Sampler\Coverage( + // $parser, + // new \Hoa\Regex\Visitor\Isotropic(new \Hoa\Math\Sampler\Random()) + // ); + + foreach($sampler as $i => $data) { + $output->writeln(sprintf('%d => %s', $i, $data)); + } + + $output->writeln(str_repeat('-', 20)); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php index fcfc3c3d2e..eb2dfb06c6 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php @@ -83,8 +83,7 @@ class Indexer $params['index'] = $this->options['index']; $params['type'] = RecordIndexer::TYPE_NAME; $params['body'][RecordIndexer::TYPE_NAME] = $this->recordIndexer->getMapping(); - - // @todo Add term mapping + $params['body'][TermIndexer::TYPE_NAME] = $this->termIndexer->getMapping(); // @todo This must throw a new indexation if a mapping is edited $this->client->indices()->putMapping($params); From 771aa5b765dd59dee9a47cb63505797371082f51 Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Mon, 23 Mar 2015 19:50:58 +0100 Subject: [PATCH 2/2] Working cross-fields queries with multiple words (without operators) - Index the full content of a record in a (private_)content_all field - Handle all fields wide search as a special-case (drastically simplify queries) - QueryContext doesn't take all allowed fields anymore, but whether private fields are allowed or not. Since private fields are namespaced, field level restriction is not needed anymore. --- .../SearchEngine/MappingUpdateCommand.php | 36 +++++++++++++++++++ .../Elastic/ElasticSearchEngine.php | 4 +-- .../Record/Hydrator/MetadataHydrator.php | 6 ++++ .../Elastic/Indexer/RecordIndexer.php | 8 +++++ .../Elastic/Search/QueryContext.php | 21 ++++++----- 5 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 lib/Alchemy/Phrasea/Command/SearchEngine/MappingUpdateCommand.php diff --git a/lib/Alchemy/Phrasea/Command/SearchEngine/MappingUpdateCommand.php b/lib/Alchemy/Phrasea/Command/SearchEngine/MappingUpdateCommand.php new file mode 100644 index 0000000000..17c336ec96 --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/SearchEngine/MappingUpdateCommand.php @@ -0,0 +1,36 @@ +setName('searchengine:mapping:update') + ->setDescription('Update index mapping') + ; + } + + protected function doExecute(InputInterface $input, OutputInterface $output) + { + $indexer = $this->container['elasticsearch.indexer']; + + $indexer->updateMapping(); + $output->writeln('Mapping pushed to index'); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index 201f17e1d9..f77e3abe0b 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -271,9 +271,7 @@ class ElasticSearchEngine implements SearchEngineInterface { $options = $options ?: new SearchEngineOptions(); - // TODO Pass options to getFields to include/exclude private fields - $searchableFields = $this->recordHelper->getFields(); - $queryContext = new QueryContext($this->locales, $this->app['locale'], $searchableFields); + $queryContext = new QueryContext($this->locales, $this->app['locale']); $recordQuery = $this->app['query_parser']->compile($string, $queryContext); $params = $this->createRecordQueryParams($recordQuery, $options, null); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 0bd41a9bcc..ab39b25c62 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -72,6 +72,12 @@ SQL; $record[$type][$key] = array(); } $record[$type][$key][] = $value; + // Collect value in the "all" field + $field = sprintf('%s_all', $type); + if (!isset($record[$field])) { + $record[$field] = array(); + } + $record[$field][] = $value; break; case 'exif': diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php index b39be201c2..21eccaca76 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php @@ -206,8 +206,16 @@ class RecordIndexer // Caption mapping $captionMapping = new Mapping(); $mapping->add('caption', $captionMapping); + $mapping + ->add('caption_all', 'string') + ->addLocalizedSubfields($this->locales) + ; $privateCaptionMapping = new Mapping(); $mapping->add('private_caption', $privateCaptionMapping); + $mapping + ->add('private_caption_all', 'string') + ->addLocalizedSubfields($this->locales) + ; // Inferred thesaurus concepts $conceptPathMapping = new Mapping(); $mapping->add('concept_path', $conceptPathMapping); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php index 89742704c2..260aaaeed1 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php @@ -19,10 +19,14 @@ class QueryContext public function narrowToFields(array $fields) { - // Ensure we are not escaping from original fields restrictions - $fields = array_intersect($this->fields, $fields); - if (!$fields) { - throw new QueryException('Query narrowed to non available fields'); + if (is_array($this->fields)) { + // Ensure we are not escaping from original fields restrictions + $fields = array_intersect($this->fields, $fields); + if (!$fields) { + throw new QueryException('Query narrowed to non available fields'); + } + } else { + $fields = null; } return new static($this->locales, $this->queryLocale, $fields); @@ -30,13 +34,14 @@ class QueryContext public function getLocalizedFields() { + // TODO Private fields handling if ($this->fields === null) { - return $this->localizeField('*'); + return $this->localizeField('caption_all'); } $fields = array(); foreach ($this->fields as $field) { - foreach ($this->localizeField($field) as $fields[]); + foreach ($this->localizeField(sprintf('caption.%s', $field)) as $fields[]); } return $fields; @@ -47,10 +52,10 @@ class QueryContext $fields = array(); foreach ($this->locales as $locale) { $boost = ($locale === $this->queryLocale) ? '^5' : ''; - $fields[] = sprintf('caption.%s.%s%s', $field, $locale, $boost); + $fields[] = sprintf('%s.%s%s', $field, $locale, $boost); } // TODO Put generic analyzers on main field instead of "light" sub-field - $fields[] = sprintf('caption.%s.%s', $field, 'light^10'); + $fields[] = sprintf('%s.light^10', $field); return $fields; }