Restrict search on visible private fields

Search on private fields is only supported on bare text search right now (TextNode).
This also fix and issue on databox::get_collection_unique_ids() returning the
value from the first call over and over.
This commit is contained in:
Mathieu Darse
2015-07-03 14:33:48 +02:00
parent 6e52421437
commit e4aafaac10
5 changed files with 131 additions and 19 deletions

View File

@@ -3,6 +3,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\AST;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term;
class TextNode extends AbstractTermNode implements ContextAbleInterface
@@ -44,16 +45,58 @@ class TextNode extends AbstractTermNode implements ContextAbleInterface
)
);
if ($conceptQueries = $this->buildConceptQueries($context)) {
$textQuery = $query;
$query = array();
$query['bool']['should'] = $conceptQueries;
$query['bool']['should'][] = $textQuery;
foreach ($this->buildPrivateFieldQueries($context) as $private_field_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $private_field_query);
}
foreach ($this->buildConceptQueries($context) as $concept_query) {
$query = QueryHelper::applyBooleanClause($query, 'should', $concept_query);
}
return $query;
}
private function buildPrivateFieldQueries(QueryContext $context)
{
// We make a boolean clause for each collection set to shrink query size
// (instead of a clause for each field, with his collection set)
$fields_map = [];
$collections_map = [];
foreach ($context->getAllowedPrivateFields() as $field) {
$collections = $context->getAllowedCollectionsOnPrivateField($field);
$hash = self::hashCollections($collections);
$collections_map[$hash] = $collections;
if (!isset($fields_map[$hash])) {
$fields_map[$hash] = [];
}
// Merge fields with others having the same collections
$fields = $context->localizeField($field->getIndexFieldName());
foreach ($fields as $fields_map[$hash][]);
}
$queries = [];
foreach ($fields_map as $hash => $fields) {
// Right to query on a private field is dependant of document collection
// Here we make sure we can only match on allowed collections
$match = [];
$match['multi_match']['fields'] = $fields;
$match['multi_match']['query'] = $this->text;
$match['multi_match']['operator'] = 'and';
$query = [];
$query['bool']['must'][0]['terms']['base_id'] = $collections_map[$hash];
$query['bool']['must'][1] = $match;
$queries[] = $query;
}
return $queries;
}
private static function hashCollections(array $collections)
{
sort($collections, SORT_REGULAR);
return implode('|', $collections);
}
public function __toString()
{
return sprintf('<text:%s>', Term::dump($this));

View File

@@ -300,6 +300,9 @@ class ElasticSearchEngine implements SearchEngineInterface
);
}
/**
* @todo Move in search engine service provider
*/
private function createQueryContext(SearchEngineOptions $options)
{
// TODO handle $user when null
@@ -321,6 +324,7 @@ class ElasticSearchEngine implements SearchEngineInterface
* "OtherFieldName" => [4],
* ]
*
* @todo Move in query context
* @param SearchEngineOptions $options
* @return array
*/
@@ -334,8 +338,11 @@ class ElasticSearchEngine implements SearchEngineInterface
$map = $this->structure->getCollectionsUsedByPrivateFields();
// Remove collections base_id which access is restricted.
foreach ($map as $_ => &$collections) {
foreach ($map as $key => &$collections) {
$collections = array_intersect($collections, $allowed_collections);
if (!$collections) {
unset($map[$key]);
}
}
return $map;

View File

@@ -3,6 +3,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\Search;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\QueryException;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
/**
@@ -46,10 +47,7 @@ class QueryContext
public function getRawFields()
{
if ($this->fields === null) {
return array(
'caption_all.raw',
'private_caption_all.raw'
);
return array('caption_all.raw');
}
$fields = array();
@@ -66,10 +64,7 @@ class QueryContext
public function getLocalizedFields()
{
if ($this->fields === null) {
return array_merge(
$this->localizeField('caption_all'),
$this->localizeField('private_caption_all')
);
return $this->localizeField('caption_all');
}
$fields = array();
@@ -81,7 +76,27 @@ class QueryContext
return $fields;
}
private function localizeField($field)
public function getAllowedPrivateFields()
{
$allowed_field_names = array_keys($this->privateCollectionMap);
return array_map(array($this->structure, 'get'), $allowed_field_names);
}
public function getAllowedCollectionsOnPrivateField(Field $field)
{
$name = $field->getName();
if (!isset($this->privateCollectionMap[$name])) {
throw new \OutOfRangeException('Given field is not an allowed private field.');
}
return $this->privateCollectionMap[$name];
}
/**
* @todo Maybe we should put this logic in Field class?
*/
public function localizeField($field)
{
$fields = array();
foreach ($this->locales as $locale) {

View File

@@ -0,0 +1,47 @@
<?php
namespace Alchemy\Phrasea\SearchEngine\Elastic\Search;
class QueryHelper
{
private function __construct() {}
/**
* Apply conjunction or disjunction between a query and a sub query clause
*
* @param array $query Query
* @param string $type "must" for conjunction, "should" for disjunction
* @param array $sub_query Clause query
* @return array Resulting query
*/
public static function applyBooleanClause(array $query, $type, array $clause)
{
if (!in_array($type, ['must', 'should'])) {
throw new \InvalidArgumentException(sprintf('Type must be either "must" or "should", "%s" given', $type));
}
if (!isset($query['bool'])) {
// Wrap in a boolean query
$bool = [];
$bool['bool'][$type][] = $query;
$bool['bool'][$type][] = $clause;
return $bool;
} elseif (isset($query['bool'][$type])) {
// Reuse the existing boolean clause group
if (!is_array($query['bool'][$type])) {
// Wrap the previous clause in an array
$previous_clause = $query['bool'][$type];
$query['bool'][$type] = [];
$query['bool'][$type][] = $previous_clause;
}
$query['bool'][$type][] = $clause;
return $query;
} else {
$query['bool'][$type] = $clause;
return $query;
}
}
}

View File

@@ -232,10 +232,10 @@ class databox extends base
public function get_collection_unique_ids()
{
static $base_ids;
static $base_ids_cache = [];
if (isset($base_ids)) {
return $base_ids;
if (isset($base_ids_cache[$this->id])) {
return $base_ids_cache[$this->id];
}
$conn = $this->get_appbox()->get_connection();
@@ -250,7 +250,7 @@ class databox extends base
$base_ids[] = (int) $row['base_id'];
}
return $base_ids;
return $base_ids_cache[$this->id] = $base_ids;
}
protected function get_available_collections()