diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php index e2992061e0..59b67067a7 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/ThesaurusHydrator.php @@ -94,7 +94,7 @@ class ThesaurusHydrator implements HydratorInterface if(empty($terms)) { return; } - $bulk = $this->thesaurus->findConceptsBulk($terms, [$sbid], null, $filters, true); + $bulk = $this->thesaurus->findConceptsBulk($terms, null, $filters, true); foreach ($bulk as $offset => $item_concepts) { $name = $field_names[$offset]; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryCompiler.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryCompiler.php index 6b782a57be..aefd0afa53 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryCompiler.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryCompiler.php @@ -46,7 +46,8 @@ class QueryCompiler // TODO We must restrict thesaurus matching for IN queries, and only // search in each field's root concepts. $nodes = $query->getTermNodes(); - $concepts = $this->thesaurus->findConceptsBulk($nodes, $context->getDataboxes()); + $filter = Thesaurus\Filter::byDataboxes($context->getDataboxes()); + $concepts = $this->thesaurus->findConceptsBulk($nodes, null, $filter, false); foreach ($concepts as $index => $termConcepts) { $node = $nodes[$index]; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php index ed950c8b18..2680bc4923 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php @@ -45,7 +45,7 @@ class Thesaurus * @param boolean $strict Strict mode matching * @return Concept[][] List of matching concepts for each term */ - public function findConceptsBulk(array $terms, array $databoxIds, $lang = null, $filter = null, $strict = false) + public function findConceptsBulk(array $terms, $lang = null, $filter = null, $strict = false) { $this->logger->debug(sprintf('Finding linked concepts in bulk for %d terms', count($terms))); @@ -61,7 +61,7 @@ class Thesaurus $concepts = array(); foreach ($terms as $index => $term) { $strict |= ($term instanceof AST\TermNode); // a "term" node is [strict group of words] - $concepts[] = $this->findConcepts($term, $databoxIds, $lang, $filters[$index], $strict); + $concepts[] = $this->findConcepts($term, $lang, $filters[$index], $strict); } return $concepts; @@ -79,16 +79,16 @@ class Thesaurus * @param boolean $strict Whether to enable strict search or not * @return Concept[] Matching concepts */ - public function findConcepts($term, array $databoxIds, $lang = null, Filter $filter = null, $strict = false) + private function findConcepts($term, $lang = null, Filter $filter = null, $strict = false) { return $strict ? - $this->findConceptsStrict($term, $databoxIds, $lang, $filter) + $this->findConceptsStrict($term, $lang, $filter) : - $this->findConceptsFuzzy($term, $databoxIds, $lang, $filter) + $this->findConceptsFuzzy($term, $lang, $filter) ; } - private function findConceptsStrict($term, array $databoxIds, $lang = null, Filter $filter = null) + private function findConceptsStrict($term, $lang = null, Filter $filter = null) { if (!($term instanceof TermInterface)) { $term = new Term($term); @@ -126,7 +126,7 @@ class Thesaurus ] ]; } - +/* if(count($databoxIds) > 0) { if(count($databoxIds) == 1) { $filters[] = [ @@ -143,7 +143,7 @@ class Thesaurus ]; } } - +*/ if ($lang) { $filters[] = [ 'term' => [ @@ -185,18 +185,34 @@ class Thesaurus $query = $must[0]; } - // Path deduplication - $aggs = array(); - $aggs['dedup']['terms']['field'] = 'path.raw'; - // Search request - $params = array(); - $params['index'] = $this->options->getIndexName(); - $params['type'] = TermIndexer::TYPE_NAME; - $params['body']['query'] = $query; - $params['body']['aggs'] = $aggs; - // No need to get any hits since we extract data from aggs - $params['body']['size'] = 0; + $params = [ + 'index' => $this->options->getIndexName(), + 'type' => TermIndexer::TYPE_NAME, + 'body' => [ + 'query' => $query, + 'aggs' => [ + // Path deduplication + 'db' => [ // databox_id + 'terms' => [ + 'field' => 'databox_id' + ], + 'aggs' => [ + // Path deduplication + 'cp' => [ // concept_path + 'terms' => [ + 'field' => 'path.raw' + ] + ] + ], + + ] + ], + // No need to get any hits since we extract data from aggs + 'size' => 0 + ] + ]; + $this->logger->debug('Sending search', $params['body']); $response = $this->client->search($params); @@ -225,7 +241,7 @@ class Thesaurus return $concepts; } - private function findConceptsFuzzy($term, array $databoxIds, $lang = null, Filter $filter = null) + private function findConceptsFuzzy($term, $lang = null, Filter $filter = null) { if (!($term instanceof TermInterface)) { $term = new Term($term); @@ -260,7 +276,7 @@ class Thesaurus $query['bool']['must'][0] = $value_query; $query['bool']['must'][1] = $context_query; } - +/* if(count($databoxIds) > 0) { if(count($databoxIds) == 1) { $query = self::applyQueryFilter( @@ -283,7 +299,7 @@ class Thesaurus ); } } - +*/ if ($lang) { $lang_filter = array(); $lang_filter['term']['lang'] = $lang; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Filter.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Filter.php index f8ac7030e5..3ee6150891 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Filter.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Filter.php @@ -13,17 +13,22 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; class Filter { - private $databox_id; + private $databox_ids; private $paths; public static function childOfConcepts($databox_id, array $concepts) { - return new self($databox_id, Concept::toPathArray($concepts)); + return new self([$databox_id], Concept::toPathArray($concepts)); } public static function byDatabox($databox_id) { - return new self($databox_id, []); + return new self([$databox_id], []); + } + + public static function byDataboxes($databox_ids) + { + return new self($databox_ids, []); } public static function dump(Filter $filter) @@ -31,15 +36,19 @@ class Filter return $filter->getQueryFilter(); // perfect as an array } - private function __construct($databox_id, array $paths) + private function __construct($databox_ids, array $paths) { - $this->databox_id = $databox_id; + $this->databox_ids = $databox_ids; $this->paths = $paths; } public function getQueryFilter() { - $filter = ['terms'=>['databox_id'=>[$this->databox_id]]]; + $filter = [ + 'terms' => [ + 'databox_id' => $this->databox_ids + ] + ]; if(count($this->paths) > 0) { $filter['terms']['path'] = $this->paths; } @@ -51,8 +60,8 @@ class Filter { $filters = [ [ - 'term' => [ - 'databox_id' => $this->databox_id + 'terms' => [ + 'databox_id' => $this->databox_ids ] ] ];