Fix number field search

Search with non numeric content will not hit number field (it breaks elasticsearch and is useless anyway)

- Rename QueryHelper::buildPrivateFieldQueries() to wrapPrivateFieldQuery().
    - Signature changed too, the third parameter is dropped an QueryContext is replaced by an array of Field.
    - Query builder closure is now passed an array of Field, not of index field names.
- Remove Field::toConceptPathIndexFieldArray() because method name was beyond understanding (and also because it wasn't needed anymore)
- Various AST node types have changed due to previous API changes
This commit is contained in:
Mathieu Darse
2015-07-23 17:39:11 +02:00
parent daea7f8c77
commit a31442368b
12 changed files with 160 additions and 107 deletions

View File

@@ -25,33 +25,38 @@ abstract class AbstractTermNode extends Node implements TermInterface
$this->concepts = $concepts;
}
protected function buildConceptQueries(QueryContext $context)
protected function buildConceptQuery(QueryContext $context)
{
$concepts = Concept::pruneNarrowConcepts($this->concepts);
if (!$concepts) {
return [];
return null;
}
$queries_builder = function (array $index_fields) use ($concepts) {
$queries = [];
$query_builder = function (array $fields) use ($concepts) {
$index_fields = [];
foreach ($fields as $field) {
$index_fields[] = $field->getConceptPathIndexField();
}
$query = null;
foreach ($concepts as $concept) {
$queries[] = [
$concept_query = [
'multi_match' => [
'fields' => $index_fields,
'query' => $concept->getPath()
]
];
$query = QueryHelper::applyBooleanClause($query, 'should', $concept_query);
}
return $queries;
return $query;
};
$fields = $context->getUnrestrictedFields();
$index_fields = Field::toConceptPathIndexFieldArray($fields);
$query = $query_builder($context->getUnrestrictedFields());
$private_fields = $context->getPrivateFields();
foreach (QueryHelper::wrapPrivateFieldConceptQueries($private_fields, $query_builder) as $private_field_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query);
}
$queries = $queries_builder($index_fields);
foreach (QueryHelper::buildPrivateFieldConceptQueries($context, $queries_builder) as $queries[]);
return $queries;
return $query;
}
public function getValue()

View File

@@ -4,6 +4,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\AST;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\TextQueryHelper;
class QuotedTextNode extends Node
{
@@ -16,20 +17,27 @@ class QuotedTextNode extends Node
public function buildQuery(QueryContext $context)
{
$query_builder = function (array $fields) {
$query_builder = function (array $fields) use ($context) {
$index_fields = [];
foreach ($fields as $field) {
foreach ($context->localizeField($field) as $index_fields[]);
}
if (!$index_fields) {
return null;
}
return [
'multi_match' => [
'type' => 'phrase',
'fields' => $fields,
'fields' => $index_fields,
'query' => $this->text,
]
];
};
$fields = $context->getLocalizedFields();
$query = $fields ? $query_builder($fields) : null;
foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder) as $private_field_query) {
$query = $query_builder($context->getUnrestrictedFields());
$private_fields = $context->getPrivateFields();
$private_fields = TextQueryHelper::filterCompatibleFields($private_fields, $this->text);
foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query);
}

View File

@@ -30,40 +30,32 @@ class RawNode extends Node
public function buildQuery(QueryContext $context)
{
$query_builder = function (array $fields) {
$index_fields = [];
foreach ($fields as $field) {
$index_fields[] = $field->getIndexField(true);
}
$query = [];
if (count($fields) > 1) {
if (count($index_fields) > 1) {
$query['multi_match']['query'] = $this->text;
$query['multi_match']['fields'] = $fields;
$query['multi_match']['fields'] = $index_fields;
$query['multi_match']['analyzer'] = 'keyword';
} else {
$field = reset($fields);
$query['term'][$field] = $this->text;
$index_field = reset($index_fields);
$query['term'][$index_field] = $this->text;
}
return $query;
};
$fields = $context->getRawFields();
$query = count($fields) ? $query_builder($fields) : null;
foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder, $this->getIndexFieldsCallback()) as $private_field_query) {
$query = $query_builder($context->getUnrestrictedFields());
$private_fields = $context->getPrivateFields();
foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query);
}
return $query;
}
private function getIndexFieldsCallback()
{
if ($this->index_fields_callback === null) {
$this->index_fields_callback = function (StructureField $field) {
return $field->getIndexField(true);
};
}
return $this->index_fields_callback;
}
public function getTermNodes()
{
return [];

View File

@@ -9,8 +9,12 @@ class TermNode extends AbstractTermNode
{
public function buildQuery(QueryContext $context)
{
$query = [];
$query['bool']['should'] = $this->buildConceptQueries($context);
$query = $this->buildConceptQuery($context);
// Should not match anything if no concept is defined
if ($query === null) {
$query['bool']['should'] = [];
}
return $query;
}

View File

@@ -4,6 +4,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\AST;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\TextQueryHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term;
class TextNode extends AbstractTermNode implements ContextAbleInterface
@@ -37,25 +38,35 @@ class TextNode extends AbstractTermNode implements ContextAbleInterface
public function buildQuery(QueryContext $context)
{
$query_builder = function (array $fields) {
$query_builder = function (array $fields) use ($context) {
$index_fields = [];
foreach ($fields as $field) {
foreach ($context->localizeField($field) as $index_fields[]);
}
if (!$index_fields) {
return null;
}
return [
'multi_match' => [
'fields' => $fields,
'fields' => $index_fields,
'query' => $this->text,
'operator' => 'and',
]
];
};
$fields = $context->getLocalizedFields();
$query = count($fields) ? $query_builder($fields) : null;
$query = $query_builder($context->getUnrestrictedFields());
foreach (QueryHelper::buildPrivateFieldQueries($context, $query_builder) as $private_field_query) {
$private_fields = $context->getPrivateFields();
$private_fields = TextQueryHelper::filterCompatibleFields($private_fields, $this->text);
foreach (QueryHelper::wrapPrivateFieldQueries($private_fields, $query_builder) as $private_field_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query);
}
foreach ($this->buildConceptQueries($context) as $concept_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $concept_query);
$concept_query = $this->buildConceptQuery($context);
if ($concept_query !== null) {
$query = QueryHelper::applyBooleanClause($query, 'should', $this->buildConceptQuery($context));
}
return $query;

View File

@@ -3,6 +3,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\Search;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\QueryException;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
use Alchemy\Phrasea\SearchEngine\Elastic\AST\Field as ASTField;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
@@ -56,20 +57,6 @@ class QueryContext
return $fields;
}
public function getLocalizedFields()
{
if ($this->fields === null) {
return $this->localizeFieldName('caption_all');
}
$fields = array();
foreach ($this->getUnrestrictedFields() as $field) {
foreach ($this->localizeField($field) as $fields[]);
}
return $fields;
}
public function getUnrestrictedFields()
{
// TODO Restore search optimization by using "caption_all" field
@@ -109,7 +96,12 @@ class QueryContext
*/
public function localizeField(Field $field)
{
return $this->localizeFieldName($field->getIndexField());
$index_field = $field->getIndexField();
if ($field->getType() === Mapping::TYPE_STRING) {
return $this->localizeFieldName($index_field);
} else {
return [$index_field];
}
}
private function localizeFieldName($field)

View File

@@ -8,19 +8,13 @@ class QueryHelper
{
private function __construct() {}
public static function buildPrivateFieldQueries(QueryContext $context, \Closure $matcher_callback, \Closure $index_fields_callback = null)
public static function wrapPrivateFieldQueries(array $fields, \Closure $query_builder)
{
if ($index_fields_callback === null) {
$index_fields_callback = function (Field $field) use ($context) {
return $context->localizeField($field);
};
}
// We make a boolean clause for each collection set to shrink query size
// (instead of a clause for each field, with his collection set)
$fields_map = [];
$collections_map = [];
foreach ($context->getPrivateFields() as $field) {
foreach ($fields as $field) {
$collections = $field->getDependantCollections();
$hash = self::hashCollections($collections);
$collections_map[$hash] = $collections;
@@ -28,8 +22,7 @@ class QueryHelper
$fields_map[$hash] = [];
}
// Merge fields with others having the same collections
$fields = (array) $index_fields_callback($field);
foreach ($fields as $fields_map[$hash][]);
$fields_map[$hash][] = $field;
}
$queries = [];
@@ -37,7 +30,7 @@ class QueryHelper
// Right to query on a private field is dependant of document collection
// Here we make sure we can only match on allowed collections
$queries[] = self::restrictQueryToCollections(
$matcher_callback($fields),
$query_builder($fields),
$collections_map[$hash]
);
}
@@ -69,15 +62,15 @@ class QueryHelper
}
/**
* @todo Factor with buildPrivateFieldQueries()
* @todo Factor with wrapPrivateFieldQueries()
*/
public static function buildPrivateFieldConceptQueries(QueryContext $context, \Closure $matchers_callback)
public static function wrapPrivateFieldConceptQueries(array $fields, \Closure $query_builder)
{
// We make a boolean clause for each collection set to shrink query size
// (instead of a clause for each field, with his collection set)
$fields_map = [];
$collections_map = [];
foreach ($context->getPrivateFields() as $field) {
foreach ($fields as $field) {
$collections = $field->getDependantCollections();
$hash = self::hashCollections($collections);
$collections_map[$hash] = $collections;
@@ -85,18 +78,17 @@ class QueryHelper
$fields_map[$hash] = [];
}
// Merge fields with others having the same collections
$fields_map[$hash][] = $field->getConceptPathIndexField();
$fields_map[$hash][] = $field;
}
$queries = [];
foreach ($fields_map as $hash => $fields) {
// Right to query on a private field is dependant of document collection
// Here we make sure we can only match on allowed collections
$query = [];
$query['bool']['must'][0]['terms']['base_id'] = $collections_map[$hash];
foreach ($matchers_callback($fields) as $concept_query) {
$query = self::applyBooleanClause($query, 'should', $concept_query);
}
$query = $query_builder($fields);
$collection_query = [];
$collection_query['terms']['base_id'] = $collections_map[$hash];
$query = self::applyBooleanClause($query, 'must', $collection_query);
$queries[] = $query;
}

View File

@@ -0,0 +1,35 @@
<?php
namespace Alchemy\Phrasea\SearchEngine\Elastic\Search;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
class TextQueryHelper
{
private function __construct() {}
public static function filterCompatibleFields(array $fields, $query_text)
{
$is_numeric = is_numeric($query_text);
$filtered = [];
foreach ($fields as $field) {
switch ($field->getType()) {
case Mapping::TYPE_FLOAT:
case Mapping::TYPE_DOUBLE:
case Mapping::TYPE_INTEGER:
case Mapping::TYPE_LONG:
case Mapping::TYPE_SHORT:
case Mapping::TYPE_BYTE:
if ($is_numeric) {
$filtered[] = $field;
}
break;
case Mapping::TYPE_STRING:
case Mapping::TYPE_DATE:
default:
$filtered[] = $field;
}
}
return $filtered;
}
}

View File

@@ -105,16 +105,6 @@ class Field
);
}
public static function toConceptPathIndexFieldArray(array $fields)
{
$index_fields = [];
foreach ($fields as $field) {
// TODO Skip fields without inference enabled?
$index_fields[] = $field->getConceptPathIndexField();
}
return $index_fields;
}
public function getConceptPathIndexField()
{
return sprintf('concept_path.%s', $this->name);

View File

@@ -23,9 +23,11 @@ class QuotedTextNodeTest extends \PHPUnit_Framework_TestCase
public function testQueryBuild()
{
$field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]);
$query_context = $this->prophesize(QueryContext::class);
$query_context->getLocalizedFields()->willReturn(['foo.fr', 'foo.en']);
$query_context->getUnrestrictedFields()->willReturn([$field]);
$query_context->getPrivateFields()->willReturn([]);
$query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']);
$node = new QuotedTextNode('bar');
$query = $node->buildQuery($query_context->reveal());
@@ -50,11 +52,14 @@ class QuotedTextNodeTest extends \PHPUnit_Framework_TestCase
]);
$query_context = $this->prophesize(QueryContext::class);
$query_context
->getUnrestrictedFields()
->willReturn([$public_field]);
$query_context
->getPrivateFields()
->willReturn([$private_field]);
$query_context
->getLocalizedFields()
->localizeField($public_field)
->willReturn(['foo.fr', 'foo.en']);
$query_context
->localizeField($private_field)

View File

@@ -35,9 +35,6 @@ class TermNodeTest extends \PHPUnit_Framework_TestCase
$query_context
->getPrivateFields()
->willReturn([]);
$query_context
->getLocalizedFields()
->willReturn(['foo.fr', 'foo.en']);
$node = new TermNode('bar');
$node->setConcepts([
@@ -65,6 +62,29 @@ class TermNodeTest extends \PHPUnit_Framework_TestCase
$this->assertEquals(json_decode($expected, true), $query);
}
public function testQueryBuildWithZeroConcept()
{
$field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]);
$query_context = $this->prophesize(QueryContext::class);
$query_context
->getUnrestrictedFields()
->willReturn([$field]);
$query_context
->getPrivateFields()
->willReturn([]);
$node = new TermNode('bar');
$query = $node->buildQuery($query_context->reveal());
$expected = '{
"bool": {
"should": []
}
}';
$this->assertEquals(json_decode($expected, true), $query);
}
public function testQueryBuildWithPrivateFields()
{
$public_field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]);

View File

@@ -42,9 +42,11 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase
public function testQueryBuild()
{
$field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]);
$query_context = $this->prophesize(QueryContext::class);
$query_context->getLocalizedFields()->willReturn(['foo.fr', 'foo.en']);
$query_context->getUnrestrictedFields()->willReturn([$field]);
$query_context->getPrivateFields()->willReturn([]);
$query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']);
$node = new TextNode('bar', new Context('baz'));
$query = $node->buildQuery($query_context->reveal());
@@ -70,7 +72,10 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase
$query_context = $this->prophesize(QueryContext::class);
$query_context
->getLocalizedFields()
->getUnrestrictedFields()
->willReturn([$public_field]);
$query_context
->localizeField($public_field)
->willReturn(['foo.fr', 'foo.en']);
$query_context
->getPrivateFields()
@@ -115,15 +120,9 @@ class TextNodeTest extends \PHPUnit_Framework_TestCase
{
$field = new Field('foo', Mapping::TYPE_STRING, ['private' => false]);
$query_context = $this->prophesize(QueryContext::class);
$query_context
->getUnrestrictedFields()
->willReturn([$field]);
$query_context
->getPrivateFields()
->willReturn([]);
$query_context
->getLocalizedFields()
->willReturn(['foo.fr', 'foo.en']);
$query_context->getUnrestrictedFields()->willReturn([$field]);
$query_context->getPrivateFields()->willReturn([]);
$query_context->localizeField($field)->willReturn(['foo.fr', 'foo.en']);
$node = new TextNode('bar');
$node->setConcepts([