PHRAS-3389_use-only-conceptpaths-from-selected-dbs_MASTER

fix : fixed [] search (thesaurus) caused by bad thesaurus hydration during record indexing
This commit is contained in:
jygaulier
2021-04-12 15:57:44 +02:00
parent 63bee24775
commit 00979f2d6d
4 changed files with 58 additions and 32 deletions

View File

@@ -94,7 +94,7 @@ class ThesaurusHydrator implements HydratorInterface
if(empty($terms)) {
return;
}
$bulk = $this->thesaurus->findConceptsBulk($terms, [$sbid], null, $filters, true);
$bulk = $this->thesaurus->findConceptsBulk($terms, null, $filters, true);
foreach ($bulk as $offset => $item_concepts) {
$name = $field_names[$offset];

View File

@@ -46,7 +46,8 @@ class QueryCompiler
// TODO We must restrict thesaurus matching for IN queries, and only
// search in each field's root concepts.
$nodes = $query->getTermNodes();
$concepts = $this->thesaurus->findConceptsBulk($nodes, $context->getDataboxes());
$filter = Thesaurus\Filter::byDataboxes($context->getDataboxes());
$concepts = $this->thesaurus->findConceptsBulk($nodes, null, $filter, false);
foreach ($concepts as $index => $termConcepts) {
$node = $nodes[$index];

View File

@@ -45,7 +45,7 @@ class Thesaurus
* @param boolean $strict Strict mode matching
* @return Concept[][] List of matching concepts for each term
*/
public function findConceptsBulk(array $terms, array $databoxIds, $lang = null, $filter = null, $strict = false)
public function findConceptsBulk(array $terms, $lang = null, $filter = null, $strict = false)
{
$this->logger->debug(sprintf('Finding linked concepts in bulk for %d terms', count($terms)));
@@ -61,7 +61,7 @@ class Thesaurus
$concepts = array();
foreach ($terms as $index => $term) {
$strict |= ($term instanceof AST\TermNode); // a "term" node is [strict group of words]
$concepts[] = $this->findConcepts($term, $databoxIds, $lang, $filters[$index], $strict);
$concepts[] = $this->findConcepts($term, $lang, $filters[$index], $strict);
}
return $concepts;
@@ -79,16 +79,16 @@ class Thesaurus
* @param boolean $strict Whether to enable strict search or not
* @return Concept[] Matching concepts
*/
public function findConcepts($term, array $databoxIds, $lang = null, Filter $filter = null, $strict = false)
private function findConcepts($term, $lang = null, Filter $filter = null, $strict = false)
{
return $strict ?
$this->findConceptsStrict($term, $databoxIds, $lang, $filter)
$this->findConceptsStrict($term, $lang, $filter)
:
$this->findConceptsFuzzy($term, $databoxIds, $lang, $filter)
$this->findConceptsFuzzy($term, $lang, $filter)
;
}
private function findConceptsStrict($term, array $databoxIds, $lang = null, Filter $filter = null)
private function findConceptsStrict($term, $lang = null, Filter $filter = null)
{
if (!($term instanceof TermInterface)) {
$term = new Term($term);
@@ -126,7 +126,7 @@ class Thesaurus
]
];
}
/*
if(count($databoxIds) > 0) {
if(count($databoxIds) == 1) {
$filters[] = [
@@ -143,7 +143,7 @@ class Thesaurus
];
}
}
*/
if ($lang) {
$filters[] = [
'term' => [
@@ -185,18 +185,34 @@ class Thesaurus
$query = $must[0];
}
// Path deduplication
$aggs = array();
$aggs['dedup']['terms']['field'] = 'path.raw';
// Search request
$params = array();
$params['index'] = $this->options->getIndexName();
$params['type'] = TermIndexer::TYPE_NAME;
$params['body']['query'] = $query;
$params['body']['aggs'] = $aggs;
// No need to get any hits since we extract data from aggs
$params['body']['size'] = 0;
$params = [
'index' => $this->options->getIndexName(),
'type' => TermIndexer::TYPE_NAME,
'body' => [
'query' => $query,
'aggs' => [
// Path deduplication
'db' => [ // databox_id
'terms' => [
'field' => 'databox_id'
],
'aggs' => [
// Path deduplication
'cp' => [ // concept_path
'terms' => [
'field' => 'path.raw'
]
]
],
]
],
// No need to get any hits since we extract data from aggs
'size' => 0
]
];
$this->logger->debug('Sending search', $params['body']);
$response = $this->client->search($params);
@@ -225,7 +241,7 @@ class Thesaurus
return $concepts;
}
private function findConceptsFuzzy($term, array $databoxIds, $lang = null, Filter $filter = null)
private function findConceptsFuzzy($term, $lang = null, Filter $filter = null)
{
if (!($term instanceof TermInterface)) {
$term = new Term($term);
@@ -260,7 +276,7 @@ class Thesaurus
$query['bool']['must'][0] = $value_query;
$query['bool']['must'][1] = $context_query;
}
/*
if(count($databoxIds) > 0) {
if(count($databoxIds) == 1) {
$query = self::applyQueryFilter(
@@ -283,7 +299,7 @@ class Thesaurus
);
}
}
*/
if ($lang) {
$lang_filter = array();
$lang_filter['term']['lang'] = $lang;

View File

@@ -13,17 +13,22 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
class Filter
{
private $databox_id;
private $databox_ids;
private $paths;
public static function childOfConcepts($databox_id, array $concepts)
{
return new self($databox_id, Concept::toPathArray($concepts));
return new self([$databox_id], Concept::toPathArray($concepts));
}
public static function byDatabox($databox_id)
{
return new self($databox_id, []);
return new self([$databox_id], []);
}
public static function byDataboxes($databox_ids)
{
return new self($databox_ids, []);
}
public static function dump(Filter $filter)
@@ -31,15 +36,19 @@ class Filter
return $filter->getQueryFilter(); // perfect as an array
}
private function __construct($databox_id, array $paths)
private function __construct($databox_ids, array $paths)
{
$this->databox_id = $databox_id;
$this->databox_ids = $databox_ids;
$this->paths = $paths;
}
public function getQueryFilter()
{
$filter = ['terms'=>['databox_id'=>[$this->databox_id]]];
$filter = [
'terms' => [
'databox_id' => $this->databox_ids
]
];
if(count($this->paths) > 0) {
$filter['terms']['path'] = $this->paths;
}
@@ -51,8 +60,8 @@ class Filter
{
$filters = [
[
'term' => [
'databox_id' => $this->databox_id
'terms' => [
'databox_id' => $this->databox_ids
]
]
];