PHRAS-3389_use-only-conceptpaths-from-selected-dbs_MASTER

fix : fixed [] search (thesaurus) caused by bad thesaurus hydration during record indexing
This commit is contained in:
jygaulier
2021-04-12 15:57:44 +02:00
parent 63bee24775
commit 00979f2d6d
4 changed files with 58 additions and 32 deletions

View File

@@ -94,7 +94,7 @@ class ThesaurusHydrator implements HydratorInterface
if(empty($terms)) { if(empty($terms)) {
return; return;
} }
$bulk = $this->thesaurus->findConceptsBulk($terms, [$sbid], null, $filters, true); $bulk = $this->thesaurus->findConceptsBulk($terms, null, $filters, true);
foreach ($bulk as $offset => $item_concepts) { foreach ($bulk as $offset => $item_concepts) {
$name = $field_names[$offset]; $name = $field_names[$offset];

View File

@@ -46,7 +46,8 @@ class QueryCompiler
// TODO We must restrict thesaurus matching for IN queries, and only // TODO We must restrict thesaurus matching for IN queries, and only
// search in each field's root concepts. // search in each field's root concepts.
$nodes = $query->getTermNodes(); $nodes = $query->getTermNodes();
$concepts = $this->thesaurus->findConceptsBulk($nodes, $context->getDataboxes()); $filter = Thesaurus\Filter::byDataboxes($context->getDataboxes());
$concepts = $this->thesaurus->findConceptsBulk($nodes, null, $filter, false);
foreach ($concepts as $index => $termConcepts) { foreach ($concepts as $index => $termConcepts) {
$node = $nodes[$index]; $node = $nodes[$index];

View File

@@ -45,7 +45,7 @@ class Thesaurus
* @param boolean $strict Strict mode matching * @param boolean $strict Strict mode matching
* @return Concept[][] List of matching concepts for each term * @return Concept[][] List of matching concepts for each term
*/ */
public function findConceptsBulk(array $terms, array $databoxIds, $lang = null, $filter = null, $strict = false) public function findConceptsBulk(array $terms, $lang = null, $filter = null, $strict = false)
{ {
$this->logger->debug(sprintf('Finding linked concepts in bulk for %d terms', count($terms))); $this->logger->debug(sprintf('Finding linked concepts in bulk for %d terms', count($terms)));
@@ -61,7 +61,7 @@ class Thesaurus
$concepts = array(); $concepts = array();
foreach ($terms as $index => $term) { foreach ($terms as $index => $term) {
$strict |= ($term instanceof AST\TermNode); // a "term" node is [strict group of words] $strict |= ($term instanceof AST\TermNode); // a "term" node is [strict group of words]
$concepts[] = $this->findConcepts($term, $databoxIds, $lang, $filters[$index], $strict); $concepts[] = $this->findConcepts($term, $lang, $filters[$index], $strict);
} }
return $concepts; return $concepts;
@@ -79,16 +79,16 @@ class Thesaurus
* @param boolean $strict Whether to enable strict search or not * @param boolean $strict Whether to enable strict search or not
* @return Concept[] Matching concepts * @return Concept[] Matching concepts
*/ */
public function findConcepts($term, array $databoxIds, $lang = null, Filter $filter = null, $strict = false) private function findConcepts($term, $lang = null, Filter $filter = null, $strict = false)
{ {
return $strict ? return $strict ?
$this->findConceptsStrict($term, $databoxIds, $lang, $filter) $this->findConceptsStrict($term, $lang, $filter)
: :
$this->findConceptsFuzzy($term, $databoxIds, $lang, $filter) $this->findConceptsFuzzy($term, $lang, $filter)
; ;
} }
private function findConceptsStrict($term, array $databoxIds, $lang = null, Filter $filter = null) private function findConceptsStrict($term, $lang = null, Filter $filter = null)
{ {
if (!($term instanceof TermInterface)) { if (!($term instanceof TermInterface)) {
$term = new Term($term); $term = new Term($term);
@@ -126,7 +126,7 @@ class Thesaurus
] ]
]; ];
} }
/*
if(count($databoxIds) > 0) { if(count($databoxIds) > 0) {
if(count($databoxIds) == 1) { if(count($databoxIds) == 1) {
$filters[] = [ $filters[] = [
@@ -143,7 +143,7 @@ class Thesaurus
]; ];
} }
} }
*/
if ($lang) { if ($lang) {
$filters[] = [ $filters[] = [
'term' => [ 'term' => [
@@ -185,18 +185,34 @@ class Thesaurus
$query = $must[0]; $query = $must[0];
} }
// Path deduplication
$aggs = array();
$aggs['dedup']['terms']['field'] = 'path.raw';
// Search request // Search request
$params = array(); $params = [
$params['index'] = $this->options->getIndexName(); 'index' => $this->options->getIndexName(),
$params['type'] = TermIndexer::TYPE_NAME; 'type' => TermIndexer::TYPE_NAME,
$params['body']['query'] = $query; 'body' => [
$params['body']['aggs'] = $aggs; 'query' => $query,
// No need to get any hits since we extract data from aggs 'aggs' => [
$params['body']['size'] = 0; // Path deduplication
'db' => [ // databox_id
'terms' => [
'field' => 'databox_id'
],
'aggs' => [
// Path deduplication
'cp' => [ // concept_path
'terms' => [
'field' => 'path.raw'
]
]
],
]
],
// No need to get any hits since we extract data from aggs
'size' => 0
]
];
$this->logger->debug('Sending search', $params['body']); $this->logger->debug('Sending search', $params['body']);
$response = $this->client->search($params); $response = $this->client->search($params);
@@ -225,7 +241,7 @@ class Thesaurus
return $concepts; return $concepts;
} }
private function findConceptsFuzzy($term, array $databoxIds, $lang = null, Filter $filter = null) private function findConceptsFuzzy($term, $lang = null, Filter $filter = null)
{ {
if (!($term instanceof TermInterface)) { if (!($term instanceof TermInterface)) {
$term = new Term($term); $term = new Term($term);
@@ -260,7 +276,7 @@ class Thesaurus
$query['bool']['must'][0] = $value_query; $query['bool']['must'][0] = $value_query;
$query['bool']['must'][1] = $context_query; $query['bool']['must'][1] = $context_query;
} }
/*
if(count($databoxIds) > 0) { if(count($databoxIds) > 0) {
if(count($databoxIds) == 1) { if(count($databoxIds) == 1) {
$query = self::applyQueryFilter( $query = self::applyQueryFilter(
@@ -283,7 +299,7 @@ class Thesaurus
); );
} }
} }
*/
if ($lang) { if ($lang) {
$lang_filter = array(); $lang_filter = array();
$lang_filter['term']['lang'] = $lang; $lang_filter['term']['lang'] = $lang;

View File

@@ -13,17 +13,22 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
class Filter class Filter
{ {
private $databox_id; private $databox_ids;
private $paths; private $paths;
public static function childOfConcepts($databox_id, array $concepts) public static function childOfConcepts($databox_id, array $concepts)
{ {
return new self($databox_id, Concept::toPathArray($concepts)); return new self([$databox_id], Concept::toPathArray($concepts));
} }
public static function byDatabox($databox_id) public static function byDatabox($databox_id)
{ {
return new self($databox_id, []); return new self([$databox_id], []);
}
public static function byDataboxes($databox_ids)
{
return new self($databox_ids, []);
} }
public static function dump(Filter $filter) public static function dump(Filter $filter)
@@ -31,15 +36,19 @@ class Filter
return $filter->getQueryFilter(); // perfect as an array return $filter->getQueryFilter(); // perfect as an array
} }
private function __construct($databox_id, array $paths) private function __construct($databox_ids, array $paths)
{ {
$this->databox_id = $databox_id; $this->databox_ids = $databox_ids;
$this->paths = $paths; $this->paths = $paths;
} }
public function getQueryFilter() public function getQueryFilter()
{ {
$filter = ['terms'=>['databox_id'=>[$this->databox_id]]]; $filter = [
'terms' => [
'databox_id' => $this->databox_ids
]
];
if(count($this->paths) > 0) { if(count($this->paths) > 0) {
$filter['terms']['path'] = $this->paths; $filter['terms']['path'] = $this->paths;
} }
@@ -51,8 +60,8 @@ class Filter
{ {
$filters = [ $filters = [
[ [
'term' => [ 'terms' => [
'databox_id' => $this->databox_id 'databox_id' => $this->databox_ids
] ]
] ]
]; ];