Refactor merged field structure

Here is the new model:

+-----------------------------+
|          Structure          |
+-----------------------------+
| +createFromDataboxes()      |
| getAllFields()              |
| getUnrestrictedFields()     |
| getPrivateFields()          |
| getFacetsFields()           |
| getThesaurusEnabledFields() |
| getDateFields()             |
|- - - - - - - - - - - - - - -|
| add()                       |
| get()                       |
| typeOf()                    |
| isPrivate()                 |
+-------+-+-+-----------------+
        | | |          +---------------------+
        | | +--------> |        Field        |
        | |            +---------------------+
        | |            | getName()           |
        | |            | getType()           |
        | |            | isXXX()             |
        | |            | getThesaurusRoots() |
        | |            +---------------------+
        | |
        | |            +-------+
        | +----------> | Field |
        |              +-------+
        |
        |              +-------+
        +------------> | Field |
                       +-------+

It was driven by the following use cases:
- Get list of facets (only searchable fields)
- Get list of fields with concept inference
- Get list of all fields
    - Splitted in private / public fields (to define mapping)
- Get all date fields
- Get field type
    - To apply sanitization rules
    - To define mapping
- Check if concept inference enabled
- Check if the field is searchable
- Check if the field is a facet
- Check if the field is private
- Dereference field from label (still to be done)

(The last two UCs are new)

Also removed old code from legacy search engines.

[#PHRAS-500]
This commit is contained in:
Mathieu Darse
2015-05-20 21:10:42 +02:00
parent 06bd5d09bc
commit 421684757a
10 changed files with 367 additions and 241 deletions

View File

@@ -122,15 +122,13 @@ $cli->command(new H264MappingGenerator());
$cli->command(new XSendFileConfigurationDumper()); $cli->command(new XSendFileConfigurationDumper());
$cli->command(new XSendFileMappingGenerator()); $cli->command(new XSendFileMappingGenerator());
if ($cli['search_engine.type'] === SearchEngineInterface::TYPE_ELASTICSEARCH) { $cli->command(new IndexCreateCommand());
$cli->command(new IndexCreateCommand()); $cli->command(new IndexDropCommand());
$cli->command(new IndexDropCommand()); $cli->command(new MappingUpdateCommand());
$cli->command(new MappingUpdateCommand()); $cli->command(new IndexPopulateCommand());
$cli->command(new IndexPopulateCommand()); $cli->command(new QueryParseCommand());
$cli->command(new QueryParseCommand()); $cli->command(new QuerySampleCommand());
$cli->command(new QuerySampleCommand()); $cli->command(new FindConceptsCommand());
$cli->command(new FindConceptsCommand());
}
$cli->loadPlugins(); $cli->loadPlugins();

View File

@@ -23,6 +23,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\Escaper; use Alchemy\Phrasea\SearchEngine\Elastic\Search\Escaper;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse; use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryCompiler; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryCompiler;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngineSubscriber; use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngineSubscriber;
use Elasticsearch\Client; use Elasticsearch\Client;
@@ -41,27 +42,18 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
return $app['search_engine']; return $app['search_engine'];
}; };
$app['search_engine'] = $app->share(function ($app) {
$type = $app['search_engine.type'];
switch ($type) {
case SearchEngineInterface::TYPE_ELASTICSEARCH:
return $app['elasticsearch.engine'];
default:
throw new InvalidArgumentException(sprintf('Invalid search engine type "%s".', $type));
}
});
$app['search_engine.type'] = function ($app) {
return $app['conf']->get(['main', 'search-engine', 'type']);
};
$app['phraseanet.SE.logger'] = $app->share(function (Application $app) { $app['phraseanet.SE.logger'] = $app->share(function (Application $app) {
return new SearchEngineLogger($app); return new SearchEngineLogger($app);
}); });
$app['elasticsearch.engine'] = $app->share(function ($app) { $app['search_engine'] = $app->share(function ($app) {
$type = $app['conf']->get(['main', 'search-engine', 'type']);
if ($type !== SearchEngineInterface::TYPE_ELASTICSEARCH) {
throw new InvalidArgumentException(sprintf('Invalid search engine type "%s".', $type));
}
return new ElasticSearchEngine( return new ElasticSearchEngine(
$app, $app,
$app['search_engine.structure'],
$app['elasticsearch.client'], $app['elasticsearch.client'],
$app['elasticsearch.options']['index'], $app['elasticsearch.options']['index'],
$app['locales.available'], $app['locales.available'],
@@ -70,6 +62,11 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
); );
}); });
$app['search_engine.structure'] = $app->share(function ($app) {
$databoxes = $app['phraseanet.appbox']->get_databoxes();
return Structure::fromDataboxes($databoxes);
});
$app['elasticsearch.facets_response.factory'] = $app->protect(function (array $response) use ($app) { $app['elasticsearch.facets_response.factory'] = $app->protect(function (array $response) use ($app) {
return new FacetsResponse(new Escaper(), $response); return new FacetsResponse(new Escaper(), $response);
}); });
@@ -96,6 +93,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
$logger = new \Monolog\Logger('indexer'); $logger = new \Monolog\Logger('indexer');
$logger->pushHandler(new \Monolog\Handler\ErrorLogHandler()); $logger->pushHandler(new \Monolog\Handler\ErrorLogHandler());
return new RecordIndexer( return new RecordIndexer(
$app['search_engine.structure'],
$app['elasticsearch.record_helper'], $app['elasticsearch.record_helper'],
$app['thesaurus'], $app['thesaurus'],
$app['phraseanet.appbox'], $app['phraseanet.appbox'],
@@ -193,12 +191,5 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
public function boot(Application $app) public function boot(Application $app)
{ {
if ($app['search_engine.type'] === SearchEngineInterface::TYPE_PHRASEA) {
$app['dispatcher'] = $app->share($app->extend('dispatcher', function ($dispatcher, Application $app) {
$dispatcher->addSubscriber($app['phraseanet.SE.subscriber']);
return $dispatcher;
}));
}
} }
} }

View File

@@ -17,6 +17,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer;
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse; use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse;
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
use Alchemy\Phrasea\SearchEngine\SearchEngineInterface; use Alchemy\Phrasea\SearchEngine\SearchEngineInterface;
use Alchemy\Phrasea\SearchEngine\SearchEngineOptions; use Alchemy\Phrasea\SearchEngine\SearchEngineOptions;
use Alchemy\Phrasea\SearchEngine\SearchEngineResult; use Alchemy\Phrasea\SearchEngine\SearchEngineResult;
@@ -34,17 +35,18 @@ class ElasticSearchEngine implements SearchEngineInterface
const FLAG_UNSET_ONLY = 'unset_only'; const FLAG_UNSET_ONLY = 'unset_only';
private $app; private $app;
private $structure;
/** @var Client */ /** @var Client */
private $client; private $client;
private $dateFields;
private $indexName; private $indexName;
private $configurationPanel; private $configurationPanel;
private $locales; private $locales;
private $recordHelper; private $recordHelper;
public function __construct(Application $app, Client $client, $indexName, array $locales, RecordHelper $recordHelper, Closure $facetsResponseFactory) public function __construct(Application $app, Structure $structure, Client $client, $indexName, array $locales, RecordHelper $recordHelper, Closure $facetsResponseFactory)
{ {
$this->app = $app; $this->app = $app;
$this->structure = $structure;
$this->client = $client; $this->client = $client;
$this->locales = array_keys($locales); $this->locales = array_keys($locales);
$this->recordHelper = $recordHelper; $this->recordHelper = $recordHelper;
@@ -117,11 +119,7 @@ class ElasticSearchEngine implements SearchEngineInterface
*/ */
public function getAvailableDateFields() public function getAvailableDateFields()
{ {
if ($this->dateFields === null) { return array_keys($this->structure->getDateFields());
$this->dateFields = $this->recordHelper->getDateFields();
}
return $this->dateFields;
} }
/** /**

View File

@@ -14,6 +14,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
use Doctrine\DBAL\Connection; use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Driver\Connection as DriverConnection; use Doctrine\DBAL\Driver\Connection as DriverConnection;
use DomainException; use DomainException;
@@ -21,12 +22,13 @@ use DomainException;
class MetadataHydrator implements HydratorInterface class MetadataHydrator implements HydratorInterface
{ {
private $connection; private $connection;
private $structure;
private $helper; private $helper;
private $fields;
public function __construct(DriverConnection $connection, RecordHelper $helper) public function __construct(DriverConnection $connection, Structure $structure, RecordHelper $helper)
{ {
$this->connection = $connection; $this->connection = $connection;
$this->structure = $structure;
$this->helper = $helper; $this->helper = $helper;
} }
@@ -72,7 +74,7 @@ SQL;
switch ($metadata['type']) { switch ($metadata['type']) {
case 'caption': case 'caption':
// Sanitize fields // Sanitize fields
switch ($this->getFieldType($key)) { switch ($this->structure->typeOf($key)) {
case Mapping::TYPE_DATE: case Mapping::TYPE_DATE:
$value = $this->helper->sanitizeDate($value); $value = $this->helper->sanitizeDate($value);
break; break;
@@ -115,16 +117,4 @@ SQL;
} }
} }
} }
private function getFieldType($key)
{
if ($this->fields === null) {
$this->fields = $this->helper->getFieldsStructure();
}
if (!isset($this->fields[$key]['type'])) {
throw new DomainException(sprintf('Unknown field "%s".', $key));
}
return $this->fields[$key]['type'];
}
} }

View File

@@ -18,29 +18,28 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Concept; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Concept;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Filter; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Filter;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
class ThesaurusHydrator implements HydratorInterface class ThesaurusHydrator implements HydratorInterface
{ {
private $structure;
private $thesaurus; private $thesaurus;
private $candidate_terms; private $candidate_terms;
private $helper;
public function __construct(Thesaurus $thesaurus, CandidateTerms $candidate_terms, RecordHelper $helper) public function __construct(Structure $structure, Thesaurus $thesaurus, CandidateTerms $candidate_terms)
{ {
$this->structure = $structure;
$this->thesaurus = $thesaurus; $this->thesaurus = $thesaurus;
$this->candidate_terms = $candidate_terms; $this->candidate_terms = $candidate_terms;
$this->helper = $helper;
} }
public function hydrateRecords(array &$records) public function hydrateRecords(array &$records)
{ {
// Fields with concept inference enabled // Fields with concept inference enabled
$structure = $this->helper->getFieldsStructure(); $structure = $this->structure->getThesaurusEnabledFields();
$fields = array(); $fields = array();
foreach ($structure as $name => $options) { foreach ($structure as $name => $field) {
if ($options['thesaurus_concept_inference']) { $fields[$name] = $field->getThesaurusRoots();
$fields[$name] = $options['thesaurus_root_concepts'];
}
} }
// Hydrate records with concepts // Hydrate records with concepts
foreach ($records as &$record) { foreach ($records as &$record) {

View File

@@ -28,6 +28,8 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\RecordQueuer;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils; use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
use databox; use databox;
@@ -55,8 +57,9 @@ class RecordIndexer
private $logger; private $logger;
public function __construct(RecordHelper $helper, Thesaurus $thesaurus, \appbox $appbox, array $locales, LoggerInterface $logger) public function __construct(Structure $structure, RecordHelper $helper, Thesaurus $thesaurus, \appbox $appbox, array $locales, LoggerInterface $logger)
{ {
$this->structure = $structure;
$this->helper = $helper; $this->helper = $helper;
$this->thesaurus = $thesaurus; $this->thesaurus = $thesaurus;
$this->appbox = $appbox; $this->appbox = $appbox;
@@ -137,8 +140,8 @@ class RecordIndexer
$fetcher = new Fetcher($connection, array( $fetcher = new Fetcher($connection, array(
new CoreHydrator($databox->get_sbas_id(), $this->helper), new CoreHydrator($databox->get_sbas_id(), $this->helper),
new TitleHydrator($connection), new TitleHydrator($connection),
new MetadataHydrator($connection, $this->helper), new MetadataHydrator($connection, $this->structure, $this->helper),
new ThesaurusHydrator($this->thesaurus, $candidateTerms, $this->helper), new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms),
new SubDefinitionHydrator($connection) new SubDefinitionHydrator($connection)
), $delegate); ), $delegate);
$fetcher->setBatchSize(200); $fetcher->setBatchSize(200);
@@ -167,6 +170,7 @@ class RecordIndexer
while ($record = $fetcher->fetch()) { while ($record = $fetcher->fetch()) {
$params = array(); $params = array();
$params['id'] = $record['id']; $params['id'] = $record['id'];
unset($record['id']);
$params['type'] = self::TYPE_NAME; $params['type'] = self::TYPE_NAME;
$params['body'] = $this->transform($record); $params['body'] = $this->transform($record);
$bulk->index($params); $bulk->index($params);
@@ -193,68 +197,79 @@ class RecordIndexer
// Dates // Dates
->add('created_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL) ->add('created_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
->add('updated_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL) ->add('updated_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
// Thesaurus
->add('concept_path', $this->getThesaurusPathMapping())
// EXIF // EXIF
->add('exif', $this->getExifMapping()) ->add('exif', $this->getExifMapping())
// Status // Status
->add('flags', $this->getFlagsMapping()) ->add('flags', $this->getFlagsMapping())
->add('flags_bitfield', 'integer')->notIndexed()
// Keep some fields arround for display purpose // Keep some fields arround for display purpose
->add('subdefs', Mapping::disabledMapping()) ->add('subdefs', Mapping::disabledMapping())
->add('title', Mapping::disabledMapping()) ->add('title', Mapping::disabledMapping())
; ;
// Caption mapping // Caption mapping
$captionMapping = new Mapping(); $this->buildCaptionMapping($this->structure->getUnrestrictedFields(), $mapping, 'caption');
$mapping->add('caption', $captionMapping); $this->buildCaptionMapping($this->structure->getPrivateFields(), $mapping, 'private_caption');
$mapping
->add('caption_all', 'string')
->addLocalizedSubfields($this->locales)
->addRawVersion()
;
$privateCaptionMapping = new Mapping();
$mapping->add('private_caption', $privateCaptionMapping);
$mapping
->add('private_caption_all', 'string')
->addLocalizedSubfields($this->locales)
->addRawVersion()
;
// Inferred thesaurus concepts
$conceptPathMapping = new Mapping();
$mapping->add('concept_path', $conceptPathMapping);
foreach ($this->helper->getFieldsStructure() as $name => $params) {
$m = $params['private'] ? $privateCaptionMapping : $captionMapping;
$m->add($name, $params['type']);
if ($params['type'] === Mapping::TYPE_DATE) {
$m->format(Mapping::DATE_FORMAT_CAPTION);
}
if ($params['type'] === Mapping::TYPE_STRING) {
if (!$params['searchable'] && !$params['to_aggregate']) {
$m->notIndexed();
} elseif (!$params['searchable'] && $params['to_aggregate']) {
$m->notAnalyzed();
$m->addRawVersion();
} else {
$m->addRawVersion();
$m->addAnalyzedVersion($this->locales);
$m->highlight();
}
}
if ($params['thesaurus_concept_inference']) {
$conceptPathMapping
->add($name, 'string')
->analyzer('thesaurus_path', 'indexing')
->analyzer('keyword', 'searching')
->addRawVersion()
;
}
}
return $mapping->export(); return $mapping->export();
} }
private function buildCaptionMapping(array $fields, Mapping $root, $section)
{
$mapping = new Mapping();
foreach ($fields as $field) {
$this->addFieldToMapping($field, $mapping);
}
$root->add($section, $mapping);
$root
->add(sprintf('%s_all', $section), 'string')
->addLocalizedSubfields($this->locales)
->addRawVersion()
;
}
private function addFieldToMapping(Field $field, Mapping $mapping)
{
$type = $field->getType();
$mapping->add($field->getName(), $type);
if ($type === Mapping::TYPE_DATE) {
$mapping->format(Mapping::DATE_FORMAT_CAPTION);
}
if ($type === Mapping::TYPE_STRING) {
$searchable = $field->isSearchable();
$facet = $field->isFacet();
if (!$searchable && !$facet) {
$mapping->notIndexed();
} elseif (!$searchable && $facet) {
$mapping->notAnalyzed();
$mapping->addRawVersion();
} else {
$mapping->addRawVersion();
$mapping->addAnalyzedVersion($this->locales);
$mapping->highlight();
}
}
}
private function getThesaurusPathMapping()
{
$mapping = new Mapping();
foreach ($this->structure->getThesaurusEnabledFields() as $name => $_) {
$mapping
->add($name, 'string')
->analyzer('thesaurus_path', 'indexing')
->analyzer('keyword', 'searching')
->addRawVersion()
;
}
return $mapping;
}
// @todo Add call to addAnalyzedVersion ? // @todo Add call to addAnalyzedVersion ?
private function getExifMapping() private function getExifMapping()
{ {

View File

@@ -13,7 +13,6 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper;
use appbox; use appbox;
use igorw; use igorw;
@@ -23,8 +22,6 @@ class RecordHelper
// Computation caches // Computation caches
private $collectionMap; private $collectionMap;
private $fieldStructure;
private $dateFields;
public function __construct(appbox $appbox) public function __construct(appbox $appbox)
{ {
@@ -76,46 +73,6 @@ class RecordHelper
return StringUtils::slugify($key, '_'); return StringUtils::slugify($key, '_');
} }
/**
* @todo Extract in a proper field construct
*/
public function getFields($includePrivate = false, $onlySearchable = true)
{
$fields = array();
foreach ($this->getFieldsStructure() as $name => $options) {
// Skip private fields
if ($options['private'] && !$includePrivate) {
continue;
}
// Skip not searchable fields
if ($onlySearchable && !$options['searchable']) {
continue;
}
$fields[] = $name;
}
return $fields;
}
/**
* @todo Extract in a proper field construct
*/
public function getDateFields()
{
if ($this->dateFields === null) {
$fields = array();
foreach ($this->getFieldsStructure() as $name => $options) {
if ($options['type'] !== 'date') {
continue;
}
$fields[] = $name;
}
$this->dateFields = $fields;
}
return $this->dateFields;
}
public function sanitizeDate($value) public function sanitizeDate($value)
{ {
// introduced in https://github.com/alchemy-fr/Phraseanet/commit/775ce804e0257d3a06e4e068bd17330a79eb8370#diff-bee690ed259e0cf73a31dee5295d2edcR286 // introduced in https://github.com/alchemy-fr/Phraseanet/commit/775ce804e0257d3a06e4e068bd17330a79eb8370#diff-bee690ed259e0cf73a31dee5295d2edcR286
@@ -127,90 +84,4 @@ class RecordHelper
return null; return null;
} }
} }
/**
* @todo Extract in a proper field construct
*/
public function getFieldsStructure()
{
if (!empty($this->fieldsStructure)) {
return $this->fieldsStructure;
}
$fields = array();
foreach ($this->appbox->get_databoxes() as $databox) {
//printf("Databox %d\n", $databox->get_sbas_id());
foreach ($databox->get_meta_structure() as $fieldStructure) {
$field = array();
// Field type
switch ($fieldStructure->get_type()) {
case \databox_field::TYPE_DATE:
$field['type'] = Mapping::TYPE_DATE;
break;
case \databox_field::TYPE_NUMBER:
$field['type'] = Mapping::TYPE_DOUBLE;
break;
case \databox_field::TYPE_STRING:
case \databox_field::TYPE_TEXT:
$field['type'] = Mapping::TYPE_STRING;
break;
default:
throw new Exception(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $fieldStructure->get_type()));
break;
}
$name = $fieldStructure->get_name();
$field['databox_ids'][] = $databox->get_sbas_id();
// Business rules
$field['private'] = $fieldStructure->isBusiness();
$field['searchable'] = $fieldStructure->is_indexable();
$field['to_aggregate'] = (bool) $fieldStructure->isAggregable();
// Thesaurus concept inference
$xpath = $fieldStructure->get_tbranch();
if ($field['type'] === Mapping::TYPE_STRING && $xpath ==! '') {
$field['thesaurus_concept_inference'] = true;
$field['thesaurus_root_concepts'] = ThesaurusHelper::findConceptsByXPath($databox, $xpath);
} else {
$field['thesaurus_concept_inference'] = false;
$field['thesaurus_root_concepts'] = [];
}
//printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
// Since mapping is merged between databoxes, two fields may
// have conflicting names. Indexing is the same for a given
// type so we reject only those with different types.
if (isset($fields[$name])) {
// keep tracks of databox_id's where the field belongs to
$fields[$name]['databox_ids'][] = $databox->get_sbas_id();
if ($fields[$name]['type'] !== $field['type']) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $fields[$name]['type'], $field['type']));
}
if ($fields[$name]['private'] !== $field['private']) {
throw new MergeException(sprintf("Field %s can't be merged, could not mix private and public fields with same name", $name));
}
if ($fields[$name]['searchable'] !== $field['searchable']) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible searchable state", $name));
}
if ($fields[$name]['to_aggregate'] !== $field['to_aggregate']) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible to_aggregate state", $name));
}
// TODO other structure incompatibilities
//printf("Merged with previous \"%s\" field\n", $name);
}
$fields[$name] = $field;
}
}
return $this->fieldsStructure = $fields;
}
} }

View File

@@ -0,0 +1,132 @@
<?php
namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper;
use databox_field;
/**
* @todo Field labels
*/
class Field
{
private $name;
private $type;
private $is_searchable;
private $is_private;
private $is_facet;
private $thesaurus_roots;
public static function createFromLegacyField(databox_field $field)
{
$type = self::getTypeFromLegacy($field);
// Thesaurus concept inference
$xpath = $field->get_tbranch();
if ($type === Mapping::TYPE_STRING && !empty($xpath)) {
$databox = $field->get_databox();
$roots = ThesaurusHelper::findConceptsByXPath($databox, $xpath);
} else {
$roots = null;
}
return new self(
$field->get_name(),
$type,
$field->is_indexable(),
$field->isBusiness(),
$field->isAggregable(),
$roots
);
}
private static function getTypeFromLegacy(databox_field $field)
{
$type = $field->get_type();
switch ($type) {
case databox_field::TYPE_DATE:
return Mapping::TYPE_DATE;
case databox_field::TYPE_NUMBER:
return Mapping::TYPE_DOUBLE;
case databox_field::TYPE_STRING:
case databox_field::TYPE_TEXT:
return Mapping::TYPE_STRING;
default:
throw new Exception(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $type));
}
}
public function __construct($name, $type, $searchable = true, $private = false, $facet = false, array $thesaurus_roots = null)
{
$this->name = (string) $name;
$this->type = (string) $type;
$this->is_searchable = (bool) $searchable;
$this->is_private = (bool) $private;
$this->is_facet = (bool) $facet;
$this->thesaurus_roots = $thesaurus_roots;
}
public function getName()
{
return $this->name;
}
public function getType()
{
return $this->type;
}
public function isSearchable()
{
return $this->is_searchable;
}
public function isPrivate()
{
return $this->is_private;
}
public function isFacet()
{
return $this->is_facet;
}
public function hasConceptInference()
{
return $this->thesaurus_roots !== null;
}
public function getThesaurusRoots()
{
return $this->thesaurus_roots;
}
public function mergeWith(Field $other)
{
if (($name = $other->getName()) !== $this->name) {
throw new MergeException(sprintf("Fields have different names (%s vs %s)", $this->name, $name));
}
// Since mapping is merged between databoxes, two fields may
// have conflicting names. Indexing is the same for a given
// type so we reject only those with different types.
if (($type = $other->getType()) !== $this->type) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $type, $this->type));
}
if ($other->isPrivate() !== $this->is_private) {
throw new MergeException(sprintf("Field %s can't be merged, could not mix private and public fields with same name", $name));
}
if ($other->isSearchable() !== $this->is_searchable) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible searchable state", $name));
}
if ($other->isFacet() !== $this->is_facet) {
throw new MergeException(sprintf("Field %s can't be merged, incompatible to_aggregate state", $name));
}
}
}

View File

@@ -0,0 +1,99 @@
<?php
namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
class Structure
{
private $fields = array();
private $date_fields = array();
private $thesaurus_fields = array();
private $private = array();
private $facets = array();
private $aliases = array();
public static function fromDataboxes(array $databoxes)
{
$structure = new self();
foreach ($databoxes as $databox) {
foreach ($databox->get_meta_structure() as $fieldStructure) {
$field = Field::createFromLegacyField($fieldStructure);
$structure->add($field);
}
}
return $structure;
}
public function add(Field $field)
{
$name = $field->getName();
if (isset($this->fields[$name])) {
$this->fields[$name]->mergeWith($field);
} else {
$this->fields[$name] = $field;
}
if ($field->getType() === Mapping::TYPE_DATE) {
$this->date_fields[$name] = $field;
}
if ($field->isPrivate()) {
$this->private[$name] = $field;
}
if ($field->isFacet()) {
$this->facets[$name] = $field;
}
if ($field->hasConceptInference()) {
$this->thesaurus_fields[$name] = $field;
}
}
public function getAllFields()
{
return $this->fields;
}
public function getUnrestrictedFields()
{
return array_diff_key($this->fields, $this->private);
}
public function getPrivateFields()
{
return $this->private;
}
public function getFacetFields()
{
// TODO should we only return searchable fields?
return $this->facets;
}
public function getThesaurusEnabledFields()
{
return $this->thesaurus_fields;
}
public function getDateFields()
{
return $this->date_fields;
}
public function get($name)
{
return isset($this->fields[$name]) ?
$this->fields[$name] : null;
}
public function typeOf($name)
{
return isset($this->fields[$name]) ?
$this->fields[$name]->getType() : null;
}
public function isPrivate($name)
{
return isset($this->private[$name]) ? true :
isset($this->fields[$name]) ? false : null;
}
}

View File

@@ -0,0 +1,33 @@
<?php
namespace Alchemy\Tests\Phrasea\SearchEngine;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
class StructureTest extends \PHPUnit_Framework_TestCase
{
public function testFieldMerge()
{
$field = new Field('foo', Mapping::TYPE_STRING);
$other = new Field('foo', Mapping::TYPE_STRING);
$field->mergeWith($other);
$this->assertEquals('foo', $field->getName());
$this->assertEquals(Mapping::TYPE_STRING, $field->getType());
$this->assertTrue($field->isSearchable());
$this->assertFalse($field->isPrivate());
$this->assertFalse($field->isFacet());
}
public function testFieldAdd()
{
$structure = new Structure();
$this->assertEmpty($structure->getAllFields());
$structure->add(new Field('foo', Mapping::TYPE_STRING));
$this->assertCount(1, $structure->getAllFields());
// Should still have only one (both have the same name)
$structure->add(new Field('foo', Mapping::TYPE_STRING));
$this->assertCount(1, $structure->getAllFields());
}
}