From a31442368b69a10d3a2df2a80ebaa9b11cfdbb46 Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Thu, 23 Jul 2015 17:39:11 +0200 Subject: [PATCH] Fix number field search Search with non numeric content will not hit number field (it breaks elasticsearch and is useless anyway) - Rename QueryHelper::buildPrivateFieldQueries() to wrapPrivateFieldQuery(). - Signature changed too, the third parameter is dropped an QueryContext is replaced by an array of Field. - Query builder closure is now passed an array of Field, not of index field names. - Remove Field::toConceptPathIndexFieldArray() because method name was beyond understanding (and also because it wasn't needed anymore) - Various AST node types have changed due to previous API changes --- .../Elastic/AST/AbstractTermNode.php | 29 ++++++++------- .../Elastic/AST/QuotedTextNode.php | 20 +++++++---- .../SearchEngine/Elastic/AST/RawNode.php | 30 ++++++---------- .../SearchEngine/Elastic/AST/TermNode.php | 8 +++-- .../SearchEngine/Elastic/AST/TextNode.php | 25 +++++++++---- .../Elastic/Search/QueryContext.php | 22 ++++-------- .../Elastic/Search/QueryHelper.php | 32 +++++++---------- .../Elastic/Search/TextQueryHelper.php | 35 +++++++++++++++++++ .../SearchEngine/Elastic/Structure/Field.php | 10 ------ .../SearchEngine/AST/QuotedTextNodeTest.php | 9 +++-- .../Phrasea/SearchEngine/AST/TermNodeTest.php | 26 ++++++++++++-- .../Phrasea/SearchEngine/AST/TextNodeTest.php | 21 ++++++----- 12 files changed, 160 insertions(+), 107 deletions(-) create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/TextQueryHelper.php diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AbstractTermNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AbstractTermNode.php index 6b152d2c49..551b57278d 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AbstractTermNode.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AbstractTermNode.php @@ -25,33 +25,38 @@ abstract class AbstractTermNode extends Node implements TermInterface $this->concepts = $concepts; } - protected function buildConceptQueries(QueryContext $context) + protected function buildConceptQuery(QueryContext $context) { $concepts = Concept::pruneNarrowConcepts($this->concepts); if (!$concepts) { - return []; + return null; } - $queries_builder = function (array $index_fields) use ($concepts) { - $queries = []; + $query_builder = function (array $fields) use ($concepts) { + $index_fields = []; + foreach ($fields as $field) { + $index_fields[] = $field->getConceptPathIndexField(); + } + $query = null; foreach ($concepts as $concept) { - $queries[] = [ + $concept_query = [ 'multi_match' => [ 'fields' => $index_fields, 'query' => $concept->getPath() ] ]; + $query = QueryHelper::applyBooleanClause($query, 'should', $concept_query); } - return $queries; + return $query; }; - $fields = $context->getUnrestrictedFields(); - $index_fields = Field::toConceptPathIndexFieldArray($fields); + $query = $query_builder($context->getUnrestrictedFields()); + $private_fields = $context->getPrivateFields(); + foreach (QueryHelper::wrapPrivateFieldConceptQueries($private_fields, $query_builder) as $private_field_query) { + $query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query); + } - $queries = $queries_builder($index_fields); - foreach (QueryHelper::buildPrivateFieldConceptQueries($context, $queries_builder) as $queries[]); - - return $queries; + return $query; } public function getValue() diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php index 2be81d11e9..83bd9f380b 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php @@ -4,6 +4,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\AST; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper; +use Alchemy\Phrasea\SearchEngine\Elastic\Search\TextQueryHelper; class QuotedTextNode extends Node { @@ -16,20 +17,27 @@ class QuotedTextNode extends Node public function buildQuery(QueryContext $context) { - $query_builder = function (array $fields) { + $query_builder = function (array $fields) use ($context) { + $index_fields = []; + foreach ($fields as $field) { + foreach ($context->localizeField($field) as $index_fields[]); + } + if (!$index_fields) { + return null; + } return [ 'multi_match' => [ 'type' => 'phrase', - 'fields' => $fields, + 'fields' => $index_fields, 'query' => $this->text, ] ]; }; - $fields = $context->getLocalizedFields(); - $query = $fields ? $query_builder($fields) : null; - - foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder) as $private_field_query) { + $query = $query_builder($context->getUnrestrictedFields()); + $private_fields = $context->getPrivateFields(); + $private_fields = TextQueryHelper::filterCompatibleFields($private_fields, $this->text); + foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) { $query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query); } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/RawNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/RawNode.php index 98a3aad51d..8fdbd889d6 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/RawNode.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/RawNode.php @@ -30,40 +30,32 @@ class RawNode extends Node public function buildQuery(QueryContext $context) { $query_builder = function (array $fields) { + $index_fields = []; + foreach ($fields as $field) { + $index_fields[] = $field->getIndexField(true); + } $query = []; - if (count($fields) > 1) { + if (count($index_fields) > 1) { $query['multi_match']['query'] = $this->text; - $query['multi_match']['fields'] = $fields; + $query['multi_match']['fields'] = $index_fields; $query['multi_match']['analyzer'] = 'keyword'; } else { - $field = reset($fields); - $query['term'][$field] = $this->text; + $index_field = reset($index_fields); + $query['term'][$index_field] = $this->text; } return $query; }; - $fields = $context->getRawFields(); - $query = count($fields) ? $query_builder($fields) : null; - - foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder, $this->getIndexFieldsCallback()) as $private_field_query) { + $query = $query_builder($context->getUnrestrictedFields()); + $private_fields = $context->getPrivateFields(); + foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) { $query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query); } return $query; } - private function getIndexFieldsCallback() - { - if ($this->index_fields_callback === null) { - $this->index_fields_callback = function (StructureField $field) { - return $field->getIndexField(true); - }; - } - - return $this->index_fields_callback; - } - public function getTermNodes() { return []; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TermNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TermNode.php index 0b26997d0e..c5e8a4da43 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TermNode.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TermNode.php @@ -9,8 +9,12 @@ class TermNode extends AbstractTermNode { public function buildQuery(QueryContext $context) { - $query = []; - $query['bool']['should'] = $this->buildConceptQueries($context); + $query = $this->buildConceptQuery($context); + + // Should not match anything if no concept is defined + if ($query === null) { + $query['bool']['should'] = []; + } return $query; } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php index d282560810..47e6eaa69e 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php @@ -4,6 +4,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\AST; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper; +use Alchemy\Phrasea\SearchEngine\Elastic\Search\TextQueryHelper; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term; class TextNode extends AbstractTermNode implements ContextAbleInterface @@ -37,25 +38,35 @@ class TextNode extends AbstractTermNode implements ContextAbleInterface public function buildQuery(QueryContext $context) { - $query_builder = function (array $fields) { + $query_builder = function (array $fields) use ($context) { + $index_fields = []; + foreach ($fields as $field) { + foreach ($context->localizeField($field) as $index_fields[]); + } + if (!$index_fields) { + return null; + } return [ 'multi_match' => [ - 'fields' => $fields, + 'fields' => $index_fields, 'query' => $this->text, 'operator' => 'and', ] ]; }; - $fields = $context->getLocalizedFields(); - $query = count($fields) ? $query_builder($fields) : null; + $query = $query_builder($context->getUnrestrictedFields()); - foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder) as $private_field_query) { + $private_fields = $context->getPrivateFields(); + $private_fields = TextQueryHelper::filterCompatibleFields($private_fields, $this->text); + + foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) { $query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query); } - foreach ($this->buildConceptQueries($context) as $concept_query) { - $query = QueryHelper::applyBooleanClause($query, 'should', $concept_query); + $concept_query = $this->buildConceptQuery($context); + if ($concept_query !== null) { + $query = QueryHelper::applyBooleanClause($query, 'should', $this->buildConceptQuery($context)); } return $query; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php index 0abf48256e..aee70e6f05 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryContext.php @@ -3,6 +3,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Search; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\QueryException; +use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field; use Alchemy\Phrasea\SearchEngine\Elastic\AST\Field as ASTField; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure; @@ -56,20 +57,6 @@ class QueryContext return $fields; } - public function getLocalizedFields() - { - if ($this->fields === null) { - return $this->localizeFieldName('caption_all'); - } - - $fields = array(); - foreach ($this->getUnrestrictedFields() as $field) { - foreach ($this->localizeField($field) as $fields[]); - } - - return $fields; - } - public function getUnrestrictedFields() { // TODO Restore search optimization by using "caption_all" field @@ -109,7 +96,12 @@ class QueryContext */ public function localizeField(Field $field) { - return $this->localizeFieldName($field->getIndexField()); + $index_field = $field->getIndexField(); + if ($field->getType() === Mapping::TYPE_STRING) { + return $this->localizeFieldName($index_field); + } else { + return [$index_field]; + } } private function localizeFieldName($field) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php index a14457aee6..d711d9bf6c 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php @@ -8,19 +8,13 @@ class QueryHelper { private function __construct() {} - public static function buildPrivateFieldQueries(QueryContext $context, \Closure $matcher_callback, \Closure $index_fields_callback = null) + public static function wrapPrivateFieldQueries(array $fields, \Closure $query_builder) { - if ($index_fields_callback === null) { - $index_fields_callback = function (Field $field) use ($context) { - return $context->localizeField($field); - }; - } - // We make a boolean clause for each collection set to shrink query size // (instead of a clause for each field, with his collection set) $fields_map = []; $collections_map = []; - foreach ($context->getPrivateFields() as $field) { + foreach ($fields as $field) { $collections = $field->getDependantCollections(); $hash = self::hashCollections($collections); $collections_map[$hash] = $collections; @@ -28,8 +22,7 @@ class QueryHelper $fields_map[$hash] = []; } // Merge fields with others having the same collections - $fields = (array) $index_fields_callback($field); - foreach ($fields as $fields_map[$hash][]); + $fields_map[$hash][] = $field; } $queries = []; @@ -37,7 +30,7 @@ class QueryHelper // Right to query on a private field is dependant of document collection // Here we make sure we can only match on allowed collections $queries[] = self::restrictQueryToCollections( - $matcher_callback($fields), + $query_builder($fields), $collections_map[$hash] ); } @@ -69,15 +62,15 @@ class QueryHelper } /** - * @todo Factor with buildPrivateFieldQueries() + * @todo Factor with wrapPrivateFieldQueries() */ - public static function buildPrivateFieldConceptQueries(QueryContext $context, \Closure $matchers_callback) + public static function wrapPrivateFieldConceptQueries(array $fields, \Closure $query_builder) { // We make a boolean clause for each collection set to shrink query size // (instead of a clause for each field, with his collection set) $fields_map = []; $collections_map = []; - foreach ($context->getPrivateFields() as $field) { + foreach ($fields as $field) { $collections = $field->getDependantCollections(); $hash = self::hashCollections($collections); $collections_map[$hash] = $collections; @@ -85,18 +78,17 @@ class QueryHelper $fields_map[$hash] = []; } // Merge fields with others having the same collections - $fields_map[$hash][] = $field->getConceptPathIndexField(); + $fields_map[$hash][] = $field; } $queries = []; foreach ($fields_map as $hash => $fields) { // Right to query on a private field is dependant of document collection // Here we make sure we can only match on allowed collections - $query = []; - $query['bool']['must'][0]['terms']['base_id'] = $collections_map[$hash]; - foreach ($matchers_callback($fields) as $concept_query) { - $query = self::applyBooleanClause($query, 'should', $concept_query); - } + $query = $query_builder($fields); + $collection_query = []; + $collection_query['terms']['base_id'] = $collections_map[$hash]; + $query = self::applyBooleanClause($query, 'must', $collection_query); $queries[] = $query; } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/TextQueryHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/TextQueryHelper.php new file mode 100644 index 0000000000..6f1205ede8 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/TextQueryHelper.php @@ -0,0 +1,35 @@ +getType()) { + case Mapping::TYPE_FLOAT: + case Mapping::TYPE_DOUBLE: + case Mapping::TYPE_INTEGER: + case Mapping::TYPE_LONG: + case Mapping::TYPE_SHORT: + case Mapping::TYPE_BYTE: + if ($is_numeric) { + $filtered[] = $field; + } + break; + case Mapping::TYPE_STRING: + case Mapping::TYPE_DATE: + default: + $filtered[] = $field; + } + } + return $filtered; + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php index 6a3be6159f..172baf6114 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php @@ -105,16 +105,6 @@ class Field ); } - public static function toConceptPathIndexFieldArray(array $fields) - { - $index_fields = []; - foreach ($fields as $field) { - // TODO Skip fields without inference enabled? - $index_fields[] = $field->getConceptPathIndexField(); - } - return $index_fields; - } - public function getConceptPathIndexField() { return sprintf('concept_path.%s', $this->name); diff --git a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/QuotedTextNodeTest.php b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/QuotedTextNodeTest.php index cd005212cc..21ae59f78d 100644 --- a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/QuotedTextNodeTest.php +++ b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/QuotedTextNodeTest.php @@ -23,9 +23,11 @@ class QuotedTextNodeTest extends \PHPUnit_Framework_TestCase public function testQueryBuild() { + $field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]); $query_context = $this->prophesize(QueryContext::class); - $query_context->getLocalizedFields()->willReturn(['foo.fr', 'foo.en']); + $query_context->getUnrestrictedFields()->willReturn([$field]); $query_context->getPrivateFields()->willReturn([]); + $query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']); $node = new QuotedTextNode('bar'); $query = $node->buildQuery($query_context->reveal()); @@ -50,11 +52,14 @@ class QuotedTextNodeTest extends \PHPUnit_Framework_TestCase ]); $query_context = $this->prophesize(QueryContext::class); + $query_context + ->getUnrestrictedFields() + ->willReturn([$public_field]); $query_context ->getPrivateFields() ->willReturn([$private_field]); $query_context - ->getLocalizedFields() + ->localizeField($public_field) ->willReturn(['foo.fr', 'foo.en']); $query_context ->localizeField($private_field) diff --git a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TermNodeTest.php b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TermNodeTest.php index 73dc6217c3..1a421cbe49 100644 --- a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TermNodeTest.php +++ b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TermNodeTest.php @@ -35,9 +35,6 @@ class TermNodeTest extends \PHPUnit_Framework_TestCase $query_context ->getPrivateFields() ->willReturn([]); - $query_context - ->getLocalizedFields() - ->willReturn(['foo.fr', 'foo.en']); $node = new TermNode('bar'); $node->setConcepts([ @@ -65,6 +62,29 @@ class TermNodeTest extends \PHPUnit_Framework_TestCase $this->assertEquals(json_decode($expected, true), $query); } + public function testQueryBuildWithZeroConcept() + { + $field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]); + $query_context = $this->prophesize(QueryContext::class); + $query_context + ->getUnrestrictedFields() + ->willReturn([$field]); + $query_context + ->getPrivateFields() + ->willReturn([]); + + $node = new TermNode('bar'); + $query = $node->buildQuery($query_context->reveal()); + + $expected = '{ + "bool": { + "should": [] + } + }'; + + $this->assertEquals(json_decode($expected, true), $query); + } + public function testQueryBuildWithPrivateFields() { $public_field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]); diff --git a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TextNodeTest.php b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TextNodeTest.php index 1e216c65ee..45a9f0f77a 100644 --- a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TextNodeTest.php +++ b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/TextNodeTest.php @@ -42,9 +42,11 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase public function testQueryBuild() { + $field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]); $query_context = $this->prophesize(QueryContext::class); - $query_context->getLocalizedFields()->willReturn(['foo.fr', 'foo.en']); + $query_context->getUnrestrictedFields()->willReturn([$field]); $query_context->getPrivateFields()->willReturn([]); + $query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']); $node = new TextNode('bar', new Context('baz')); $query = $node->buildQuery($query_context->reveal()); @@ -70,7 +72,10 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase $query_context = $this->prophesize(QueryContext::class); $query_context - ->getLocalizedFields() + ->getUnrestrictedFields() + ->willReturn([$public_field]); + $query_context + ->localizeField($public_field) ->willReturn(['foo.fr', 'foo.en']); $query_context ->getPrivateFields() @@ -115,15 +120,9 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase { $field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]); $query_context = $this->prophesize(QueryContext::class); - $query_context - ->getUnrestrictedFields() - ->willReturn([$field]); - $query_context - ->getPrivateFields() - ->willReturn([]); - $query_context - ->getLocalizedFields() - ->willReturn(['foo.fr', 'foo.en']); + $query_context->getUnrestrictedFields()->willReturn([$field]); + $query_context->getPrivateFields()->willReturn([]); + $query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']); $node = new TextNode('bar'); $node->setConcepts([