mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-18 15:33:15 +00:00
Refactor merged field structure
Here is the new model: +-----------------------------+ | Structure | +-----------------------------+ | +createFromDataboxes() | | getAllFields() | | getUnrestrictedFields() | | getPrivateFields() | | getFacetsFields() | | getThesaurusEnabledFields() | | getDateFields() | |- - - - - - - - - - - - - - -| | add() | | get() | | typeOf() | | isPrivate() | +-------+-+-+-----------------+ | | | +---------------------+ | | +--------> | Field | | | +---------------------+ | | | getName() | | | | getType() | | | | isXXX() | | | | getThesaurusRoots() | | | +---------------------+ | | | | +-------+ | +----------> | Field | | +-------+ | | +-------+ +------------> | Field | +-------+ It was driven by the following use cases: - Get list of facets (only searchable fields) - Get list of fields with concept inference - Get list of all fields - Splitted in private / public fields (to define mapping) - Get all date fields - Get field type - To apply sanitization rules - To define mapping - Check if concept inference enabled - Check if the field is searchable - Check if the field is a facet - Check if the field is private - Dereference field from label (still to be done) (The last two UCs are new) Also removed old code from legacy search engines. [#PHRAS-500]
This commit is contained in:
16
bin/console
16
bin/console
@@ -122,15 +122,13 @@ $cli->command(new H264MappingGenerator());
|
||||
$cli->command(new XSendFileConfigurationDumper());
|
||||
$cli->command(new XSendFileMappingGenerator());
|
||||
|
||||
if ($cli['search_engine.type'] === SearchEngineInterface::TYPE_ELASTICSEARCH) {
|
||||
$cli->command(new IndexCreateCommand());
|
||||
$cli->command(new IndexDropCommand());
|
||||
$cli->command(new MappingUpdateCommand());
|
||||
$cli->command(new IndexPopulateCommand());
|
||||
$cli->command(new QueryParseCommand());
|
||||
$cli->command(new QuerySampleCommand());
|
||||
$cli->command(new FindConceptsCommand());
|
||||
}
|
||||
$cli->command(new IndexCreateCommand());
|
||||
$cli->command(new IndexDropCommand());
|
||||
$cli->command(new MappingUpdateCommand());
|
||||
$cli->command(new IndexPopulateCommand());
|
||||
$cli->command(new QueryParseCommand());
|
||||
$cli->command(new QuerySampleCommand());
|
||||
$cli->command(new FindConceptsCommand());
|
||||
|
||||
$cli->loadPlugins();
|
||||
|
||||
|
@@ -23,6 +23,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Search\Escaper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryCompiler;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngineSubscriber;
|
||||
use Elasticsearch\Client;
|
||||
@@ -41,27 +42,18 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
return $app['search_engine'];
|
||||
};
|
||||
|
||||
$app['search_engine'] = $app->share(function ($app) {
|
||||
$type = $app['search_engine.type'];
|
||||
switch ($type) {
|
||||
case SearchEngineInterface::TYPE_ELASTICSEARCH:
|
||||
return $app['elasticsearch.engine'];
|
||||
default:
|
||||
throw new InvalidArgumentException(sprintf('Invalid search engine type "%s".', $type));
|
||||
}
|
||||
});
|
||||
|
||||
$app['search_engine.type'] = function ($app) {
|
||||
return $app['conf']->get(['main', 'search-engine', 'type']);
|
||||
};
|
||||
|
||||
$app['phraseanet.SE.logger'] = $app->share(function (Application $app) {
|
||||
return new SearchEngineLogger($app);
|
||||
});
|
||||
|
||||
$app['elasticsearch.engine'] = $app->share(function ($app) {
|
||||
$app['search_engine'] = $app->share(function ($app) {
|
||||
$type = $app['conf']->get(['main', 'search-engine', 'type']);
|
||||
if ($type !== SearchEngineInterface::TYPE_ELASTICSEARCH) {
|
||||
throw new InvalidArgumentException(sprintf('Invalid search engine type "%s".', $type));
|
||||
}
|
||||
return new ElasticSearchEngine(
|
||||
$app,
|
||||
$app['search_engine.structure'],
|
||||
$app['elasticsearch.client'],
|
||||
$app['elasticsearch.options']['index'],
|
||||
$app['locales.available'],
|
||||
@@ -70,6 +62,11 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
);
|
||||
});
|
||||
|
||||
$app['search_engine.structure'] = $app->share(function ($app) {
|
||||
$databoxes = $app['phraseanet.appbox']->get_databoxes();
|
||||
return Structure::fromDataboxes($databoxes);
|
||||
});
|
||||
|
||||
$app['elasticsearch.facets_response.factory'] = $app->protect(function (array $response) use ($app) {
|
||||
return new FacetsResponse(new Escaper(), $response);
|
||||
});
|
||||
@@ -96,6 +93,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
$logger = new \Monolog\Logger('indexer');
|
||||
$logger->pushHandler(new \Monolog\Handler\ErrorLogHandler());
|
||||
return new RecordIndexer(
|
||||
$app['search_engine.structure'],
|
||||
$app['elasticsearch.record_helper'],
|
||||
$app['thesaurus'],
|
||||
$app['phraseanet.appbox'],
|
||||
@@ -193,12 +191,5 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
|
||||
public function boot(Application $app)
|
||||
{
|
||||
if ($app['search_engine.type'] === SearchEngineInterface::TYPE_PHRASEA) {
|
||||
$app['dispatcher'] = $app->share($app->extend('dispatcher', function ($dispatcher, Application $app) {
|
||||
$dispatcher->addSubscriber($app['phraseanet.SE.subscriber']);
|
||||
|
||||
return $dispatcher;
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -17,6 +17,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Search\FacetsResponse;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
use Alchemy\Phrasea\SearchEngine\SearchEngineInterface;
|
||||
use Alchemy\Phrasea\SearchEngine\SearchEngineOptions;
|
||||
use Alchemy\Phrasea\SearchEngine\SearchEngineResult;
|
||||
@@ -34,17 +35,18 @@ class ElasticSearchEngine implements SearchEngineInterface
|
||||
const FLAG_UNSET_ONLY = 'unset_only';
|
||||
|
||||
private $app;
|
||||
private $structure;
|
||||
/** @var Client */
|
||||
private $client;
|
||||
private $dateFields;
|
||||
private $indexName;
|
||||
private $configurationPanel;
|
||||
private $locales;
|
||||
private $recordHelper;
|
||||
|
||||
public function __construct(Application $app, Client $client, $indexName, array $locales, RecordHelper $recordHelper, Closure $facetsResponseFactory)
|
||||
public function __construct(Application $app, Structure $structure, Client $client, $indexName, array $locales, RecordHelper $recordHelper, Closure $facetsResponseFactory)
|
||||
{
|
||||
$this->app = $app;
|
||||
$this->structure = $structure;
|
||||
$this->client = $client;
|
||||
$this->locales = array_keys($locales);
|
||||
$this->recordHelper = $recordHelper;
|
||||
@@ -117,11 +119,7 @@ class ElasticSearchEngine implements SearchEngineInterface
|
||||
*/
|
||||
public function getAvailableDateFields()
|
||||
{
|
||||
if ($this->dateFields === null) {
|
||||
$this->dateFields = $this->recordHelper->getDateFields();
|
||||
}
|
||||
|
||||
return $this->dateFields;
|
||||
return array_keys($this->structure->getDateFields());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -14,6 +14,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
use Doctrine\DBAL\Connection;
|
||||
use Doctrine\DBAL\Driver\Connection as DriverConnection;
|
||||
use DomainException;
|
||||
@@ -21,12 +22,13 @@ use DomainException;
|
||||
class MetadataHydrator implements HydratorInterface
|
||||
{
|
||||
private $connection;
|
||||
private $structure;
|
||||
private $helper;
|
||||
private $fields;
|
||||
|
||||
public function __construct(DriverConnection $connection, RecordHelper $helper)
|
||||
public function __construct(DriverConnection $connection, Structure $structure, RecordHelper $helper)
|
||||
{
|
||||
$this->connection = $connection;
|
||||
$this->structure = $structure;
|
||||
$this->helper = $helper;
|
||||
}
|
||||
|
||||
@@ -72,7 +74,7 @@ SQL;
|
||||
switch ($metadata['type']) {
|
||||
case 'caption':
|
||||
// Sanitize fields
|
||||
switch ($this->getFieldType($key)) {
|
||||
switch ($this->structure->typeOf($key)) {
|
||||
case Mapping::TYPE_DATE:
|
||||
$value = $this->helper->sanitizeDate($value);
|
||||
break;
|
||||
@@ -115,16 +117,4 @@ SQL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function getFieldType($key)
|
||||
{
|
||||
if ($this->fields === null) {
|
||||
$this->fields = $this->helper->getFieldsStructure();
|
||||
}
|
||||
if (!isset($this->fields[$key]['type'])) {
|
||||
throw new DomainException(sprintf('Unknown field "%s".', $key));
|
||||
}
|
||||
|
||||
return $this->fields[$key]['type'];
|
||||
}
|
||||
}
|
||||
|
@@ -18,29 +18,28 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Concept;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Filter;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Term;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
|
||||
class ThesaurusHydrator implements HydratorInterface
|
||||
{
|
||||
private $structure;
|
||||
private $thesaurus;
|
||||
private $candidate_terms;
|
||||
private $helper;
|
||||
|
||||
public function __construct(Thesaurus $thesaurus, CandidateTerms $candidate_terms, RecordHelper $helper)
|
||||
public function __construct(Structure $structure, Thesaurus $thesaurus, CandidateTerms $candidate_terms)
|
||||
{
|
||||
$this->structure = $structure;
|
||||
$this->thesaurus = $thesaurus;
|
||||
$this->candidate_terms = $candidate_terms;
|
||||
$this->helper = $helper;
|
||||
}
|
||||
|
||||
public function hydrateRecords(array &$records)
|
||||
{
|
||||
// Fields with concept inference enabled
|
||||
$structure = $this->helper->getFieldsStructure();
|
||||
$structure = $this->structure->getThesaurusEnabledFields();
|
||||
$fields = array();
|
||||
foreach ($structure as $name => $options) {
|
||||
if ($options['thesaurus_concept_inference']) {
|
||||
$fields[$name] = $options['thesaurus_root_concepts'];
|
||||
}
|
||||
foreach ($structure as $name => $field) {
|
||||
$fields[$name] = $field->getThesaurusRoots();
|
||||
}
|
||||
// Hydrate records with concepts
|
||||
foreach ($records as &$record) {
|
||||
|
@@ -28,6 +28,8 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\RecordQueuer;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
|
||||
use databox;
|
||||
@@ -55,8 +57,9 @@ class RecordIndexer
|
||||
|
||||
private $logger;
|
||||
|
||||
public function __construct(RecordHelper $helper, Thesaurus $thesaurus, \appbox $appbox, array $locales, LoggerInterface $logger)
|
||||
public function __construct(Structure $structure, RecordHelper $helper, Thesaurus $thesaurus, \appbox $appbox, array $locales, LoggerInterface $logger)
|
||||
{
|
||||
$this->structure = $structure;
|
||||
$this->helper = $helper;
|
||||
$this->thesaurus = $thesaurus;
|
||||
$this->appbox = $appbox;
|
||||
@@ -137,8 +140,8 @@ class RecordIndexer
|
||||
$fetcher = new Fetcher($connection, array(
|
||||
new CoreHydrator($databox->get_sbas_id(), $this->helper),
|
||||
new TitleHydrator($connection),
|
||||
new MetadataHydrator($connection, $this->helper),
|
||||
new ThesaurusHydrator($this->thesaurus, $candidateTerms, $this->helper),
|
||||
new MetadataHydrator($connection, $this->structure, $this->helper),
|
||||
new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms),
|
||||
new SubDefinitionHydrator($connection)
|
||||
), $delegate);
|
||||
$fetcher->setBatchSize(200);
|
||||
@@ -167,6 +170,7 @@ class RecordIndexer
|
||||
while ($record = $fetcher->fetch()) {
|
||||
$params = array();
|
||||
$params['id'] = $record['id'];
|
||||
unset($record['id']);
|
||||
$params['type'] = self::TYPE_NAME;
|
||||
$params['body'] = $this->transform($record);
|
||||
$bulk->index($params);
|
||||
@@ -193,66 +197,77 @@ class RecordIndexer
|
||||
// Dates
|
||||
->add('created_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
->add('updated_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
// Thesaurus
|
||||
->add('concept_path', $this->getThesaurusPathMapping())
|
||||
// EXIF
|
||||
->add('exif', $this->getExifMapping())
|
||||
// Status
|
||||
->add('flags', $this->getFlagsMapping())
|
||||
->add('flags_bitfield', 'integer')->notIndexed()
|
||||
// Keep some fields arround for display purpose
|
||||
->add('subdefs', Mapping::disabledMapping())
|
||||
->add('title', Mapping::disabledMapping())
|
||||
;
|
||||
|
||||
// Caption mapping
|
||||
$captionMapping = new Mapping();
|
||||
$mapping->add('caption', $captionMapping);
|
||||
$mapping
|
||||
->add('caption_all', 'string')
|
||||
->addLocalizedSubfields($this->locales)
|
||||
->addRawVersion()
|
||||
;
|
||||
$privateCaptionMapping = new Mapping();
|
||||
$mapping->add('private_caption', $privateCaptionMapping);
|
||||
$mapping
|
||||
->add('private_caption_all', 'string')
|
||||
->addLocalizedSubfields($this->locales)
|
||||
->addRawVersion()
|
||||
;
|
||||
// Inferred thesaurus concepts
|
||||
$conceptPathMapping = new Mapping();
|
||||
$mapping->add('concept_path', $conceptPathMapping);
|
||||
$this->buildCaptionMapping($this->structure->getUnrestrictedFields(), $mapping, 'caption');
|
||||
$this->buildCaptionMapping($this->structure->getPrivateFields(), $mapping, 'private_caption');
|
||||
|
||||
foreach ($this->helper->getFieldsStructure() as $name => $params) {
|
||||
$m = $params['private'] ? $privateCaptionMapping : $captionMapping;
|
||||
$m->add($name, $params['type']);
|
||||
|
||||
if ($params['type'] === Mapping::TYPE_DATE) {
|
||||
$m->format(Mapping::DATE_FORMAT_CAPTION);
|
||||
return $mapping->export();
|
||||
}
|
||||
|
||||
if ($params['type'] === Mapping::TYPE_STRING) {
|
||||
if (!$params['searchable'] && !$params['to_aggregate']) {
|
||||
$m->notIndexed();
|
||||
} elseif (!$params['searchable'] && $params['to_aggregate']) {
|
||||
$m->notAnalyzed();
|
||||
$m->addRawVersion();
|
||||
private function buildCaptionMapping(array $fields, Mapping $root, $section)
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
foreach ($fields as $field) {
|
||||
$this->addFieldToMapping($field, $mapping);
|
||||
}
|
||||
$root->add($section, $mapping);
|
||||
$root
|
||||
->add(sprintf('%s_all', $section), 'string')
|
||||
->addLocalizedSubfields($this->locales)
|
||||
->addRawVersion()
|
||||
;
|
||||
}
|
||||
|
||||
private function addFieldToMapping(Field $field, Mapping $mapping)
|
||||
{
|
||||
$type = $field->getType();
|
||||
$mapping->add($field->getName(), $type);
|
||||
|
||||
if ($type === Mapping::TYPE_DATE) {
|
||||
$mapping->format(Mapping::DATE_FORMAT_CAPTION);
|
||||
}
|
||||
|
||||
if ($type === Mapping::TYPE_STRING) {
|
||||
$searchable = $field->isSearchable();
|
||||
$facet = $field->isFacet();
|
||||
if (!$searchable && !$facet) {
|
||||
$mapping->notIndexed();
|
||||
} elseif (!$searchable && $facet) {
|
||||
$mapping->notAnalyzed();
|
||||
$mapping->addRawVersion();
|
||||
} else {
|
||||
$m->addRawVersion();
|
||||
$m->addAnalyzedVersion($this->locales);
|
||||
$m->highlight();
|
||||
$mapping->addRawVersion();
|
||||
$mapping->addAnalyzedVersion($this->locales);
|
||||
$mapping->highlight();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($params['thesaurus_concept_inference']) {
|
||||
$conceptPathMapping
|
||||
private function getThesaurusPathMapping()
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
foreach ($this->structure->getThesaurusEnabledFields() as $name => $_) {
|
||||
$mapping
|
||||
->add($name, 'string')
|
||||
->analyzer('thesaurus_path', 'indexing')
|
||||
->analyzer('keyword', 'searching')
|
||||
->addRawVersion()
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
return $mapping->export();
|
||||
return $mapping;
|
||||
}
|
||||
|
||||
// @todo Add call to addAnalyzedVersion ?
|
||||
|
@@ -13,7 +13,6 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper;
|
||||
use appbox;
|
||||
use igorw;
|
||||
|
||||
@@ -23,8 +22,6 @@ class RecordHelper
|
||||
|
||||
// Computation caches
|
||||
private $collectionMap;
|
||||
private $fieldStructure;
|
||||
private $dateFields;
|
||||
|
||||
public function __construct(appbox $appbox)
|
||||
{
|
||||
@@ -76,46 +73,6 @@ class RecordHelper
|
||||
return StringUtils::slugify($key, '_');
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo Extract in a proper field construct
|
||||
*/
|
||||
public function getFields($includePrivate = false, $onlySearchable = true)
|
||||
{
|
||||
$fields = array();
|
||||
foreach ($this->getFieldsStructure() as $name => $options) {
|
||||
// Skip private fields
|
||||
if ($options['private'] && !$includePrivate) {
|
||||
continue;
|
||||
}
|
||||
// Skip not searchable fields
|
||||
if ($onlySearchable && !$options['searchable']) {
|
||||
continue;
|
||||
}
|
||||
$fields[] = $name;
|
||||
}
|
||||
|
||||
return $fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo Extract in a proper field construct
|
||||
*/
|
||||
public function getDateFields()
|
||||
{
|
||||
if ($this->dateFields === null) {
|
||||
$fields = array();
|
||||
foreach ($this->getFieldsStructure() as $name => $options) {
|
||||
if ($options['type'] !== 'date') {
|
||||
continue;
|
||||
}
|
||||
$fields[] = $name;
|
||||
}
|
||||
$this->dateFields = $fields;
|
||||
}
|
||||
|
||||
return $this->dateFields;
|
||||
}
|
||||
|
||||
public function sanitizeDate($value)
|
||||
{
|
||||
// introduced in https://github.com/alchemy-fr/Phraseanet/commit/775ce804e0257d3a06e4e068bd17330a79eb8370#diff-bee690ed259e0cf73a31dee5295d2edcR286
|
||||
@@ -127,90 +84,4 @@ class RecordHelper
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo Extract in a proper field construct
|
||||
*/
|
||||
public function getFieldsStructure()
|
||||
{
|
||||
if (!empty($this->fieldsStructure)) {
|
||||
return $this->fieldsStructure;
|
||||
}
|
||||
|
||||
$fields = array();
|
||||
|
||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||
//printf("Databox %d\n", $databox->get_sbas_id());
|
||||
foreach ($databox->get_meta_structure() as $fieldStructure) {
|
||||
$field = array();
|
||||
// Field type
|
||||
switch ($fieldStructure->get_type()) {
|
||||
case \databox_field::TYPE_DATE:
|
||||
$field['type'] = Mapping::TYPE_DATE;
|
||||
break;
|
||||
case \databox_field::TYPE_NUMBER:
|
||||
$field['type'] = Mapping::TYPE_DOUBLE;
|
||||
break;
|
||||
case \databox_field::TYPE_STRING:
|
||||
case \databox_field::TYPE_TEXT:
|
||||
$field['type'] = Mapping::TYPE_STRING;
|
||||
break;
|
||||
default:
|
||||
throw new Exception(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $fieldStructure->get_type()));
|
||||
break;
|
||||
}
|
||||
|
||||
$name = $fieldStructure->get_name();
|
||||
$field['databox_ids'][] = $databox->get_sbas_id();
|
||||
|
||||
// Business rules
|
||||
$field['private'] = $fieldStructure->isBusiness();
|
||||
$field['searchable'] = $fieldStructure->is_indexable();
|
||||
$field['to_aggregate'] = (bool) $fieldStructure->isAggregable();
|
||||
|
||||
// Thesaurus concept inference
|
||||
$xpath = $fieldStructure->get_tbranch();
|
||||
if ($field['type'] === Mapping::TYPE_STRING && $xpath ==! '') {
|
||||
$field['thesaurus_concept_inference'] = true;
|
||||
$field['thesaurus_root_concepts'] = ThesaurusHelper::findConceptsByXPath($databox, $xpath);
|
||||
} else {
|
||||
$field['thesaurus_concept_inference'] = false;
|
||||
$field['thesaurus_root_concepts'] = [];
|
||||
}
|
||||
|
||||
//printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
||||
|
||||
// Since mapping is merged between databoxes, two fields may
|
||||
// have conflicting names. Indexing is the same for a given
|
||||
// type so we reject only those with different types.
|
||||
if (isset($fields[$name])) {
|
||||
// keep tracks of databox_id's where the field belongs to
|
||||
$fields[$name]['databox_ids'][] = $databox->get_sbas_id();
|
||||
|
||||
if ($fields[$name]['type'] !== $field['type']) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $fields[$name]['type'], $field['type']));
|
||||
}
|
||||
|
||||
if ($fields[$name]['private'] !== $field['private']) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, could not mix private and public fields with same name", $name));
|
||||
}
|
||||
|
||||
if ($fields[$name]['searchable'] !== $field['searchable']) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible searchable state", $name));
|
||||
}
|
||||
|
||||
if ($fields[$name]['to_aggregate'] !== $field['to_aggregate']) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible to_aggregate state", $name));
|
||||
}
|
||||
// TODO other structure incompatibilities
|
||||
|
||||
//printf("Merged with previous \"%s\" field\n", $name);
|
||||
}
|
||||
|
||||
$fields[$name] = $field;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->fieldsStructure = $fields;
|
||||
}
|
||||
}
|
||||
|
132
lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php
Normal file
132
lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php
Normal file
@@ -0,0 +1,132 @@
|
||||
<?php
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper;
|
||||
use databox_field;
|
||||
|
||||
/**
|
||||
* @todo Field labels
|
||||
*/
|
||||
class Field
|
||||
{
|
||||
private $name;
|
||||
private $type;
|
||||
private $is_searchable;
|
||||
private $is_private;
|
||||
private $is_facet;
|
||||
private $thesaurus_roots;
|
||||
|
||||
public static function createFromLegacyField(databox_field $field)
|
||||
{
|
||||
$type = self::getTypeFromLegacy($field);
|
||||
|
||||
// Thesaurus concept inference
|
||||
$xpath = $field->get_tbranch();
|
||||
if ($type === Mapping::TYPE_STRING && !empty($xpath)) {
|
||||
$databox = $field->get_databox();
|
||||
$roots = ThesaurusHelper::findConceptsByXPath($databox, $xpath);
|
||||
} else {
|
||||
$roots = null;
|
||||
}
|
||||
|
||||
return new self(
|
||||
$field->get_name(),
|
||||
$type,
|
||||
$field->is_indexable(),
|
||||
$field->isBusiness(),
|
||||
$field->isAggregable(),
|
||||
$roots
|
||||
);
|
||||
}
|
||||
|
||||
private static function getTypeFromLegacy(databox_field $field)
|
||||
{
|
||||
$type = $field->get_type();
|
||||
switch ($type) {
|
||||
case databox_field::TYPE_DATE:
|
||||
return Mapping::TYPE_DATE;
|
||||
case databox_field::TYPE_NUMBER:
|
||||
return Mapping::TYPE_DOUBLE;
|
||||
case databox_field::TYPE_STRING:
|
||||
case databox_field::TYPE_TEXT:
|
||||
return Mapping::TYPE_STRING;
|
||||
default:
|
||||
throw new Exception(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $type));
|
||||
}
|
||||
}
|
||||
|
||||
public function __construct($name, $type, $searchable = true, $private = false, $facet = false, array $thesaurus_roots = null)
|
||||
{
|
||||
$this->name = (string) $name;
|
||||
$this->type = (string) $type;
|
||||
$this->is_searchable = (bool) $searchable;
|
||||
$this->is_private = (bool) $private;
|
||||
$this->is_facet = (bool) $facet;
|
||||
$this->thesaurus_roots = $thesaurus_roots;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
|
||||
public function getType()
|
||||
{
|
||||
return $this->type;
|
||||
}
|
||||
|
||||
public function isSearchable()
|
||||
{
|
||||
return $this->is_searchable;
|
||||
}
|
||||
|
||||
public function isPrivate()
|
||||
{
|
||||
return $this->is_private;
|
||||
}
|
||||
|
||||
public function isFacet()
|
||||
{
|
||||
return $this->is_facet;
|
||||
}
|
||||
|
||||
public function hasConceptInference()
|
||||
{
|
||||
return $this->thesaurus_roots !== null;
|
||||
}
|
||||
|
||||
public function getThesaurusRoots()
|
||||
{
|
||||
return $this->thesaurus_roots;
|
||||
}
|
||||
|
||||
public function mergeWith(Field $other)
|
||||
{
|
||||
if (($name = $other->getName()) !== $this->name) {
|
||||
throw new MergeException(sprintf("Fields have different names (%s vs %s)", $this->name, $name));
|
||||
}
|
||||
|
||||
// Since mapping is merged between databoxes, two fields may
|
||||
// have conflicting names. Indexing is the same for a given
|
||||
// type so we reject only those with different types.
|
||||
|
||||
if (($type = $other->getType()) !== $this->type) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $type, $this->type));
|
||||
}
|
||||
|
||||
if ($other->isPrivate() !== $this->is_private) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, could not mix private and public fields with same name", $name));
|
||||
}
|
||||
|
||||
if ($other->isSearchable() !== $this->is_searchable) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible searchable state", $name));
|
||||
}
|
||||
|
||||
if ($other->isFacet() !== $this->is_facet) {
|
||||
throw new MergeException(sprintf("Field %s can't be merged, incompatible to_aggregate state", $name));
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,99 @@
|
||||
<?php
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
|
||||
class Structure
|
||||
{
|
||||
private $fields = array();
|
||||
private $date_fields = array();
|
||||
private $thesaurus_fields = array();
|
||||
private $private = array();
|
||||
private $facets = array();
|
||||
private $aliases = array();
|
||||
|
||||
public static function fromDataboxes(array $databoxes)
|
||||
{
|
||||
$structure = new self();
|
||||
foreach ($databoxes as $databox) {
|
||||
foreach ($databox->get_meta_structure() as $fieldStructure) {
|
||||
$field = Field::createFromLegacyField($fieldStructure);
|
||||
$structure->add($field);
|
||||
}
|
||||
}
|
||||
return $structure;
|
||||
}
|
||||
|
||||
public function add(Field $field)
|
||||
{
|
||||
$name = $field->getName();
|
||||
if (isset($this->fields[$name])) {
|
||||
$this->fields[$name]->mergeWith($field);
|
||||
} else {
|
||||
$this->fields[$name] = $field;
|
||||
}
|
||||
|
||||
if ($field->getType() === Mapping::TYPE_DATE) {
|
||||
$this->date_fields[$name] = $field;
|
||||
}
|
||||
if ($field->isPrivate()) {
|
||||
$this->private[$name] = $field;
|
||||
}
|
||||
if ($field->isFacet()) {
|
||||
$this->facets[$name] = $field;
|
||||
}
|
||||
if ($field->hasConceptInference()) {
|
||||
$this->thesaurus_fields[$name] = $field;
|
||||
}
|
||||
}
|
||||
|
||||
public function getAllFields()
|
||||
{
|
||||
return $this->fields;
|
||||
}
|
||||
|
||||
public function getUnrestrictedFields()
|
||||
{
|
||||
return array_diff_key($this->fields, $this->private);
|
||||
}
|
||||
|
||||
public function getPrivateFields()
|
||||
{
|
||||
return $this->private;
|
||||
}
|
||||
|
||||
public function getFacetFields()
|
||||
{
|
||||
// TODO should we only return searchable fields?
|
||||
return $this->facets;
|
||||
}
|
||||
|
||||
public function getThesaurusEnabledFields()
|
||||
{
|
||||
return $this->thesaurus_fields;
|
||||
}
|
||||
|
||||
public function getDateFields()
|
||||
{
|
||||
return $this->date_fields;
|
||||
}
|
||||
|
||||
public function get($name)
|
||||
{
|
||||
return isset($this->fields[$name]) ?
|
||||
$this->fields[$name] : null;
|
||||
}
|
||||
|
||||
public function typeOf($name)
|
||||
{
|
||||
return isset($this->fields[$name]) ?
|
||||
$this->fields[$name]->getType() : null;
|
||||
}
|
||||
|
||||
public function isPrivate($name)
|
||||
{
|
||||
return isset($this->private[$name]) ? true :
|
||||
isset($this->fields[$name]) ? false : null;
|
||||
}
|
||||
}
|
33
tests/Alchemy/Tests/Phrasea/SearchEngine/StructureTest.php
Normal file
33
tests/Alchemy/Tests/Phrasea/SearchEngine/StructureTest.php
Normal file
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
namespace Alchemy\Tests\Phrasea\SearchEngine;
|
||||
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
|
||||
|
||||
class StructureTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testFieldMerge()
|
||||
{
|
||||
$field = new Field('foo', Mapping::TYPE_STRING);
|
||||
$other = new Field('foo', Mapping::TYPE_STRING);
|
||||
$field->mergeWith($other);
|
||||
$this->assertEquals('foo', $field->getName());
|
||||
$this->assertEquals(Mapping::TYPE_STRING, $field->getType());
|
||||
$this->assertTrue($field->isSearchable());
|
||||
$this->assertFalse($field->isPrivate());
|
||||
$this->assertFalse($field->isFacet());
|
||||
}
|
||||
|
||||
public function testFieldAdd()
|
||||
{
|
||||
$structure = new Structure();
|
||||
$this->assertEmpty($structure->getAllFields());
|
||||
$structure->add(new Field('foo', Mapping::TYPE_STRING));
|
||||
$this->assertCount(1, $structure->getAllFields());
|
||||
// Should still have only one (both have the same name)
|
||||
$structure->add(new Field('foo', Mapping::TYPE_STRING));
|
||||
$this->assertCount(1, $structure->getAllFields());
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user