diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php index c98cd44be1..dbfda581f4 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php @@ -11,7 +11,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic; -abstract class FieldMapping +class FieldMapping { const DATE_FORMAT_MYSQL = 'yyyy-MM-dd HH:mm:ss'; @@ -47,6 +47,7 @@ abstract class FieldMapping self::TYPE_SHORT, self::TYPE_BYTE, self::TYPE_IP, + self::TYPE_OBJECT ); /** @@ -64,6 +65,15 @@ abstract class FieldMapping */ private $indexed = true; + /** + * @var bool + */ + private $enabled = true; + + /** + * @var bool + */ + private $raw = false; /** * @param string $name @@ -77,7 +87,7 @@ abstract class FieldMapping if (! in_array($type, self::$types)) { throw new \InvalidArgumentException(sprintf( - 'Invalid field mapping type "%s", expected "%s" or Mapping instance.', + 'Invalid field mapping type "%s", expected "%s"', $type, implode('", "', self::$types) )); @@ -125,10 +135,46 @@ abstract class FieldMapping return $this; } + public function enableRawIndexing() + { + $this->raw = true; + + return $this; + } + + /** + * @return bool + */ + public function isEnabled() + { + return $this->enabled; + } + + public function enableMapping() + { + $this->enabled = true; + } + + public function disableMapping() + { + $this->enabled = false; + } + /** * @return array */ - abstract public function toArray(); + public function toArray() + { + return $this->buildArray($this->getProperties()); + } + + /** + * @return array + */ + protected function getProperties() + { + return []; + } /** * Helper function to append custom field properties to generic properties array @@ -136,11 +182,26 @@ abstract class FieldMapping * @param array $fieldProperties * @return array */ - protected function buildArray(array $fieldProperties = []) + private function buildArray(array $fieldProperties = []) { - return array_merge([ - 'type' => $this->getType(), - 'index' => $this->indexed ? 'yes' : 'no' - ], $fieldProperties); + $baseProperties = [ ]; + + if ($this->type !== self::TYPE_OBJECT) { + $baseProperties['type'] = $this->type; + } else { + $baseProperties['properties'] = []; + } + + if (! $this->indexed) { + $baseProperties['index'] = 'no'; + } elseif ($this->raw) { + $baseProperties['index'] = 'not_analyzed'; + } + + if (! $this->enabled) { + $baseProperties['enabled'] = false; + } + + return array_replace($baseProperties, $fieldProperties); } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php index a938292b22..8054b29793 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php @@ -25,6 +25,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\SubDefinitionHy use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\ThesaurusHydrator; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\TitleHydrator; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; +use Alchemy\Phrasea\SearchEngine\Elastic\MappingBuilder; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure; @@ -308,7 +309,7 @@ class RecordIndexer return array_values($databoxes); } - private function indexFromFetcher(BulkOperation $bulk, Fetcher $fetcher, array &$submited_records) + private function indexFromFetcher(BulkOperation $bulk, Fetcher $fetcher, array &$submitted_records) { $databox = $fetcher->getDatabox(); $first = true; @@ -338,9 +339,124 @@ class RecordIndexer $params['type'] = self::TYPE_NAME; $params['body'] = $record; - $submited_records[$op_identifier] = $record; + $submitted_records[$op_identifier] = $record; $bulk->index($params, $op_identifier); } } + + + public function getMapping() + { + $mapping = new MappingBuilder(); + + // Compound primary key + $mapping->addField('record_id', FieldMapping::TYPE_INTEGER); + $mapping->addField('databox_id', FieldMapping::TYPE_INTEGER); + + // Database name (still indexed for facets) + $mapping->addStringField('databox_name')->disableAnalysis(); + // Unique collection ID + $mapping->addField('base_id', FieldMapping::TYPE_INTEGER); + // Useless collection ID (local to databox) + $mapping->addField('collection_id', FieldMapping::TYPE_INTEGER)->disableIndexing(); + // Collection name (still indexed for facets) + $mapping->addStringField('collection_name')->disableAnalysis(); + + $mapping->addStringField('uuid')->disableIndexing(); + $mapping->addStringField('sha256')->disableIndexing(); + $mapping->addStringField('original_name')->disableIndexing(); + $mapping->addStringField('mime')->disableAnalysis(); + $mapping->addStringField('type')->disableAnalysis(); + $mapping->addStringField('record_type')->disableAnalysis(); + + $mapping->addDateField('created_on', FieldMapping::DATE_FORMAT_MYSQL_OR_CAPTION); + $mapping->addDateField('updated_on', FieldMapping::DATE_FORMAT_MYSQL_OR_CAPTION); + + $mapping->add($this->buildThesaurusPathMapping('concept_path')); + $mapping->add($this->buildMetadataTagMapping('metadata_tags')); + $mapping->add($this->buildFlagMapping('flags')); + + $mapping->addField('flags_bitfield', FieldMapping::TYPE_INTEGER)->disableIndexing(); + $mapping->addField('subdefs', FieldMapping::TYPE_OBJECT)->disableMapping(); + $mapping->addField('title', FieldMapping::TYPE_OBJECT)->disableMapping(); + + // Caption mapping + $this->buildCaptionMapping($mapping, 'caption', $this->structure->getUnrestrictedFields()); + $this->buildCaptionMapping($mapping, 'private_caption', $this->structure->getPrivateFields()); + + echo var_export($mapping->getMapping()->export()); die(); + } + + private function buildCaptionMapping(MappingBuilder $parent, $name, array $fields) + { + $fieldConverter = new Mapping\FieldToFieldMappingConverter(); + $captionMapping = new Mapping\ComplexFieldMapping($name, FieldMapping::TYPE_OBJECT); + + $captionMapping->useAsPropertyContainer(); + + foreach ($fields as $field) { + $captionMapping->addChild($fieldConverter->convertField($field, $this->locales)); + } + + $parent->add($captionMapping); + + $localizedCaptionMapping = new Mapping\StringFieldMapping(sprintf('%s_all', $name)); + $localizedCaptionMapping + ->addLocalizedChildren($this->locales) + ->addChild((new Mapping\StringFieldMapping('raw'))->enableRawIndexing()); + + $parent->add($localizedCaptionMapping); + + return $captionMapping; + } + + private function buildThesaurusPathMapping($name) + { + $thesaurusMapping = new Mapping\ComplexFieldMapping($name, FieldMapping::TYPE_OBJECT); + + foreach (array_keys($this->structure->getThesaurusEnabledFields()) as $name) { + $child = new Mapping\StringFieldMapping($name); + + $child->setAnalyzer('thesaurus_path', 'indexing'); + $child->setAnalyzer('keyword', 'searching'); + $child->addChild((new Mapping\StringFieldMapping('raw'))->enableRawIndexing()); + + $thesaurusMapping->addChild($thesaurusMapping); + } + + return $thesaurusMapping; + } + + private function buildMetadataTagMapping($name) + { + $tagConverter = new Mapping\MetadataTagToFieldMappingConverter(); + $metadataMapping = new Mapping\ComplexFieldMapping($name, FieldMapping::TYPE_OBJECT); + + $metadataMapping->useAsPropertyContainer(); + + foreach ($this->structure->getMetadataTags() as $tag) { + $metadataMapping->addChild($tagConverter->convertTag($tag)); + } + + return $metadataMapping; + } + + private function buildFlagMapping($name) + { + $index = 0; + $flagMapping = new Mapping\ComplexFieldMapping($name, FieldMapping::TYPE_OBJECT); + + $flagMapping->useAsPropertyContainer(); + + foreach ($this->structure->getAllFlags() as $childName => $_) { + if (trim($childName) == '') { + $childName = 'flag_' . $index++; + } + + $flagMapping->addChild(new FieldMapping($childName, FieldMapping::TYPE_BOOLEAN)); + } + + return $flagMapping; + } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php index c66f8583ba..a11695cd59 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php @@ -11,7 +11,9 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; +use Alchemy\Phrasea\SearchEngine\Elastic\MappingBuilder; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Navigator; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\TermVisitor; @@ -81,25 +83,27 @@ class TermIndexer public function getMapping() { - $mapping = new Mapping(); - $mapping - ->add('raw_value', 'string')->notAnalyzed() - ->add('value', 'string') - ->analyzer('general_light') - ->addMultiField('strict', 'thesaurus_term_strict') - ->addLocalizedSubfields($this->locales) - ->add('context', 'string') - ->analyzer('general_light') - ->addMultiField('strict', 'thesaurus_term_strict') - ->addLocalizedSubfields($this->locales) - ->add('path', 'string') - ->analyzer('thesaurus_path', 'indexing') - ->analyzer('keyword', 'searching') - ->addRawVersion() - ->add('lang', 'string')->notAnalyzed() - ->add('databox_id', 'integer') - ; + $mapping = new MappingBuilder(); - return $mapping->export(); + $mapping->addStringField('raw_value')->disableAnalysis(); + $mapping->addStringField('value') + ->setAnalyzer('general_light') + ->addAnalyzedChild('strict', 'thesaurus_term_strict') + ->addLocalizedChildren($this->locales); + + $mapping->addStringField('context') + ->setAnalyzer('general_light') + ->addAnalyzedChild('strict', 'thesaurus_term_strict') + ->addLocalizedChildren($this->locales); + + $mapping->addStringField('path') + ->setAnalyzer('thesaurus_path', 'indexing') + ->setAnalyzer('keyword', 'searching') + ->addRawChild(); + + $mapping->addStringField('lang')->disableAnalysis(); + $mapping->addField('databox_id', FieldMapping::TYPE_STRING); + + return $mapping->getMapping()->export(); } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php index e61f6cf8b7..d25995ce96 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping.php @@ -11,113 +11,63 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic; -use LogicException; -use RuntimeException; - class Mapping { - const DATE_FORMAT_MYSQL = 'yyyy-MM-dd HH:mm:ss'; - const DATE_FORMAT_CAPTION = 'yyyy/MM/dd'; // ES format - const DATE_FORMAT_MYSQL_OR_CAPTION = 'yyyy-MM-dd HH:mm:ss||yyyy/MM/dd'; - const DATE_FORMAT_CAPTION_PHP = 'Y/m/d'; // PHP format - - // Core types - const TYPE_STRING = 'string'; - const TYPE_BOOLEAN = 'boolean'; - const TYPE_DATE = 'date'; - - // Number core types - const TYPE_FLOAT = 'float'; - const TYPE_DOUBLE = 'double'; - const TYPE_INTEGER = 'integer'; - const TYPE_LONG = 'long'; - const TYPE_SHORT = 'short'; - const TYPE_BYTE = 'byte'; - const TYPE_IP = 'ip'; - - // Compound types - const TYPE_OBJECT = 'object'; - - private static $types = array( - self::TYPE_STRING, - self::TYPE_BOOLEAN, - self::TYPE_DATE, - self::TYPE_FLOAT, - self::TYPE_DOUBLE, - self::TYPE_INTEGER, - self::TYPE_LONG, - self::TYPE_SHORT, - self::TYPE_BYTE, - self::TYPE_IP, - ); - public static function disabledMapping() { return (new self())->disable(); } /** - * @var array + * @var FieldMapping[] */ private $fields = array(); - /** - * @var string - */ - private $current; - /** * @var bool */ private $enabled = true; + /** + * @param FieldMapping $fieldMapping + * @return FieldMapping + */ + public function addField(FieldMapping $fieldMapping) + { + return $this->fields[$fieldMapping->getName()] = $fieldMapping; + } + /** * @param string $name - * @param string|Mapping $type - * @return $this + * @return bool + * @deprecated Use hasField instead */ - public function add($name, $type) + public function has($name) { - if ($type instanceof self) { - return $this->addComplexType($name, $type); - } - - if (! in_array($type, self::$types)) { - throw new RuntimeException(sprintf( - 'Invalid field mapping type "%s", expected "%s" or Mapping instance.', - $type, - implode('", "', self::$types) - )); - } - - return $this->addFieldConfiguration($name, [ 'type' => $type ]); + return $this->hasField($name); } /** - * @param $name - * @param Mapping $typeMapping - * @return $this + * @param string $name + * @return bool */ - public function addComplexType($name, Mapping $typeMapping) + public function hasField($name) { - return $this->addFieldConfiguration($name, [ - 'type' => self::TYPE_OBJECT, - 'mapping' => $typeMapping - ]); + return isset($this->fields[$name]); } - /** - * @param $name - * @param array $configuration - * @return $this - */ - private function addFieldConfiguration($name, array $configuration) + public function removeField($name) { - $this->fields[$name] = $configuration; - $this->current = $name; + if ($this->has($name)) { + $field = $this->fields[$name]; - return $this; + unset($this->fields[$name]); + + return $field; + } + + throw new \InvalidArgumentException('Mapping does not contain field: ' . $name); } /** @@ -129,14 +79,10 @@ class Mapping $mapping['properties'] = array(); foreach ($this->fields as $name => $field) { - if ($field['type'] === self::TYPE_OBJECT) { - $field = $field['mapping']->export(); - } - - $mapping['properties'][$name] = $field; + $mapping['properties'][$name] = $field->toArray(); } - if (!$this->enabled) { + if (! $this->enabled) { $mapping['enabled'] = false; } @@ -154,88 +100,4 @@ class Mapping return $this; } - - public function addRawVersion() - { - $field = & $this->currentField(); - - $field['fields']['raw'] = [ - 'type' => $field['type'], - 'index' => 'not_analyzed' - ]; - - return $this; - } - - /** - * @deprecated - */ - public function addAnalyzedVersion(array $locales) - { - $this->addMultiField('light', 'general_light'); - - return $this->addLocalizedSubfields($locales); - } - - public function addLocalizedSubfields(array $locales) - { - foreach ($locales as $locale) { - $this->addMultiField($locale, sprintf('%s_full', $locale)); - } - - return $this; - } - - public function addMultiField($name, $analyzer = null) - { - $field = &$this->currentField(); - - if (isset($field['fields'][$name])) { - throw new LogicException(sprintf('There is already a "%s" multi field.', $name)); - } - - $field['fields'][$name] = array(); - $field['fields'][$name]['type'] = $field['type']; - - if ($analyzer) { - $field['fields'][$name]['analyzer'] = $analyzer; - } - - return $this; - } - - public function enableTermVectors($recursive = false) - { - $field = &$this->currentField(); - - if ($field['type'] !== self::TYPE_STRING) { - throw new LogicException('Only string fields can have term vectors'); - } - - $field['term_vector'] = 'with_positions_offsets'; - - if ($recursive) { - if (isset($field['fields'])) { - foreach ($field['fields'] as $name => &$options) { - $options['term_vector'] = 'with_positions_offsets'; - } - } - } - - return $this; - } - - public function has($name) - { - return isset($this->fields[$name]); - } - - protected function ¤tField() - { - if (null === $this->current) { - throw new LogicException('You must add a field first'); - } - - return $this->fields[$this->current]; - } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/ComplexFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/ComplexFieldMapping.php index 4c558765a1..1159144650 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/ComplexFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/ComplexFieldMapping.php @@ -20,12 +20,49 @@ class ComplexFieldMapping extends FieldMapping */ private $children = []; + private $childKey = 'fields'; + + public function useAsPropertyContainer() + { + $this->childKey = 'properties'; + } + + public function useAsFieldContainer() + { + $this->childKey = 'fields'; + } + /** * @param FieldMapping $child + * @return FieldMapping */ public function addChild(FieldMapping $child) { - $this->children[] = $child; + if (isset($this->children[$child->getName()])) { + throw new \LogicException(sprintf('There is already a "%s" multi field.', $child->getName())); + } + + if ($child->getType() !== $this->getType() && $this->getType() !== self::TYPE_OBJECT) { + throw new \LogicException('Child field type must match parent type.'); + } + + return $this->children[$child->getName()] = $child; + } + + /** + * @return RawFieldMapping + */ + public function addRawChild() + { + return $this->addChild(new RawFieldMapping($this->getType())); + } + + /** + * @return bool + */ + public function hasChildren() + { + return ! empty($this->children); } /** @@ -39,8 +76,18 @@ class ComplexFieldMapping extends FieldMapping /** * @return array */ - public function toArray() + protected function getProperties() { - return $this->buildArray([ ]); + if (! $this->hasChildren()) { + return []; + } + + $properties = [ ]; + + foreach ($this->children as $name => $child) { + $properties[$name] = $child->toArray(); + } + + return [ $this->childKey => $properties ]; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php index 0a65851f1b..a96ef45f4a 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php @@ -11,13 +11,11 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Mapping; -use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; - /** * Class DateFieldMapping * @package Alchemy\Phrasea\SearchEngine\Elastic\Mapping */ -class DateFieldMapping extends FieldMapping +class DateFieldMapping extends ComplexFieldMapping { /** * @var string @@ -26,12 +24,11 @@ class DateFieldMapping extends FieldMapping /** * @param string $name - * @param string $type * @param string $format */ - public function __construct($name, $type, $format) + public function __construct($name, $format) { - parent::__construct($name, $type); + parent::__construct($name, self::TYPE_DATE); $this->format = $format; } @@ -58,8 +55,8 @@ class DateFieldMapping extends FieldMapping /** * @return array */ - public function toArray() + protected function getProperties() { - return $this->buildArray([ 'format' => $this->format ]); + return array_merge([ 'format' => $this->format ], parent::getProperties()); } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php new file mode 100644 index 0000000000..46c28b9cce --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php @@ -0,0 +1,42 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Alchemy\Phrasea\SearchEngine\Elastic\Mapping; + +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field; + +class FieldToFieldMappingConverter +{ + + public function convertField(Field $field, array $locales) + { + if ($field->getType() === FieldMapping::TYPE_DATE) { + return new DateFieldMapping($field->getName(), FieldMapping::DATE_FORMAT_CAPTION); + } + + if ($field->getType() === FieldMapping::TYPE_STRING) { + $fieldMapping = new StringFieldMapping($field->getName()); + + if (! $field->isFacet() && ! $field->isSearchable()) { + $fieldMapping->disableIndexing(); + } else { + $fieldMapping->addChild((new StringFieldMapping('raw'))->enableRawIndexing()); + $fieldMapping->addAnalyzedChildren($locales); + $fieldMapping->enableTermVectors(true); + } + + return $fieldMapping; + } + + return new FieldMapping($field->getName(), $field->getType()); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/MetadataTagToFieldMappingConverter.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/MetadataTagToFieldMappingConverter.php new file mode 100644 index 0000000000..07d186b4e3 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/MetadataTagToFieldMappingConverter.php @@ -0,0 +1,37 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Alchemy\Phrasea\SearchEngine\Elastic\Mapping; + +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Tag; + +class MetadataTagToFieldMappingConverter +{ + + public function convertTag(Tag $tag) + { + if ($tag->getType() === FieldMapping::TYPE_STRING) { + $fieldMapping = new StringFieldMapping($tag->getName()); + + $fieldMapping->disableAnalysis(); + + if ($tag->isAnalyzable()) { + $fieldMapping->addChild((new StringFieldMapping('raw'))->enableRawIndexing()); + $fieldMapping->enableAnalysis(); + } + + return $fieldMapping; + } + + return new FieldMapping($tag->getName(), $tag->getType()); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/LocalizedFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/RawFieldMapping.php similarity index 59% rename from lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/LocalizedFieldMapping.php rename to lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/RawFieldMapping.php index 28615fb369..1eaf377296 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/LocalizedFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/RawFieldMapping.php @@ -13,14 +13,22 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; -class LocalizedFieldMapping extends FieldMapping +class RawFieldMapping extends FieldMapping { + /** + * @param string $type + */ + public function __construct($type) + { + parent::__construct('raw', $type); + } + /** * @return array */ - public function toArray() + protected function getProperties() { - return $this->buildArray([]); + return [ 'index' => 'not_analyzed' ]; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php index cff50020d1..b9022b2238 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php @@ -11,9 +11,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Mapping; -use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; - -class StringFieldMapping extends FieldMapping +class StringFieldMapping extends ComplexFieldMapping { /** * @var bool @@ -30,9 +28,57 @@ class StringFieldMapping extends FieldMapping */ private $searchAnalyzer = null; + /** + * @var string|null + */ + private $termVector = null; + + /** + * @param string $name + */ + public function __construct($name) + { + parent::__construct($name, self::TYPE_STRING); + } + + public function addAnalyzedChild($name, $analyzer) + { + $child = new self($name); + + $child->setAnalyzer($analyzer); + $this->addChild($child); + + return $this; + } + + public function addAnalyzedChildren(array $locales) + { + $child = new StringFieldMapping('light'); + $child->setAnalyzer('general_light'); + + $this->addChild($child); + $this->addLocalizedChildren($locales); + + return $this; + } + + public function addLocalizedChildren(array $locales) + { + foreach ($locales as $locale) { + /** @var StringFieldMapping $child */ + $child = new StringFieldMapping($locale); + + $child->setAnalyzer(sprintf('%s_full', $locale)); + $this->addChild($child); + } + + return $this; + } + /** * @param string $analyzer * @param string|null $type + * @return $this */ public function setAnalyzer($analyzer, $type = null) { @@ -56,37 +102,63 @@ class StringFieldMapping extends FieldMapping default: throw new \LogicException(sprintf('Invalid analyzer type "%s".', $type)); } + + return $this; } public function disableAnalysis() { $this->enableAnalysis = false; + + return $this; } public function enableAnalysis() { $this->enableAnalysis = true; + + return $this; + } + + public function enableTermVectors($applyToChildren = false) + { + $this->termVector = 'with_positions_offsets'; + + if ($applyToChildren) { + /** @var self $child */ + foreach ($this->getChildren() as $child) { + if ($child instanceof StringFieldMapping) { + $child->enableTermVectors(false); + } + } + } + + return $this; } /** * @return array */ - public function toArray() + protected function getProperties() { - $configuration = []; + $properties = []; if ($this->analyzer) { - $configuration['analyzer'] = $this->analyzer; + $properties['analyzer'] = $this->analyzer; } if ($this->searchAnalyzer) { - $configuration['search_analyzer'] = $this->searchAnalyzer; + $properties['search_analyzer'] = $this->searchAnalyzer; } if (! $this->enableAnalysis) { - $configuration['index'] = 'not_analyzed'; + $properties['index'] = 'not_analyzed'; } - return $this->buildArray($configuration); + if ($this->termVector) { + $properties['term_vector'] = $this->termVector; + } + + return array_replace(parent::getProperties(), $properties); } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/MappingBuilder.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/MappingBuilder.php new file mode 100644 index 0000000000..9030e36e20 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/MappingBuilder.php @@ -0,0 +1,74 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Alchemy\Phrasea\SearchEngine\Elastic; + +use Alchemy\Phrasea\SearchEngine\Elastic\Mapping\DateFieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\Mapping\StringFieldMapping; + +class MappingBuilder +{ + /** + * @var Mapping + */ + private $mapping; + + public function __construct() + { + $this->mapping = new Mapping(); + } + + /** + * @param string $name; + * @return StringFieldMapping + */ + public function addStringField($name) + { + return $this->mapping->addField(new StringFieldMapping($name)); + } + + /** + * @param string $name + * @param string $format + * @return DateFieldMapping + */ + public function addDateField($name, $format) + { + return $this->mapping->addField(new DateFieldMapping($name, $format)); + } + + /** + * @param string $name + * @param string $type + * @return FieldMapping + */ + public function addField($name, $type) + { + return $this->mapping->addField(new FieldMapping($name, $type)); + } + + /*** + * @param FieldMapping $fieldMapping + * @return FieldMapping + */ + public function add(FieldMapping $fieldMapping) + { + return $this->mapping->addField($fieldMapping); + } + + /** + * @return Mapping + */ + public function getMapping() + { + return $this->mapping; + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php index 5211c0318e..9c364a8843 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/Field.php @@ -3,6 +3,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\MergeException; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Concept; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper; @@ -51,7 +52,7 @@ class Field implements Typed // Thesaurus concept inference $xpath = $field->get_tbranch(); - if ($type === Mapping::TYPE_STRING && !empty($xpath)) { + if ($type === FieldMapping::TYPE_STRING && !empty($xpath)) { $roots = ThesaurusHelper::findConceptsByXPath($databox, $xpath); } else { $roots = null; @@ -77,14 +78,15 @@ class Field implements Typed private static function getTypeFromLegacy(databox_field $field) { $type = $field->get_type(); + switch ($type) { case databox_field::TYPE_DATE: - return Mapping::TYPE_DATE; + return FieldMapping::TYPE_DATE; case databox_field::TYPE_NUMBER: - return Mapping::TYPE_DOUBLE; + return FieldMapping::TYPE_DOUBLE; case databox_field::TYPE_STRING: case databox_field::TYPE_TEXT: - return Mapping::TYPE_STRING; + return FieldMapping::TYPE_STRING; } throw new \InvalidArgumentException(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $type)); @@ -136,7 +138,7 @@ class Field implements Typed '%scaption.%s%s', $this->is_private ? 'private_' : '', $this->name, - $raw && $this->type === Mapping::TYPE_STRING ? '.raw' : '' + $raw && $this->type === FieldMapping::TYPE_STRING ? '.raw' : '' ); } @@ -203,7 +205,7 @@ class Field implements Typed // type so we reject only those with different types. if (($type = $other->getType()) !== $this->type) { - throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $type, $this->type)); + //throw new MergeException(sprintf("Field %s can't be merged, incompatible types (%s vs %s)", $name, $type, $this->type)); } if ($other->isPrivate() !== $this->is_private) { diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/GlobalStructure.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/GlobalStructure.php index 6fcf0e2028..239072f1b5 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/GlobalStructure.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/GlobalStructure.php @@ -2,6 +2,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Assert\Assertion; use DomainException; @@ -76,12 +77,15 @@ final class GlobalStructure implements Structure Assertion::allIsInstanceOf($fields, Field::class); Assertion::allIsInstanceOf($flags, Flag::class); Assertion::allIsInstanceOf($metadata_tags, Tag::class); + foreach ($fields as $field) { $this->add($field); } + foreach ($flags as $flag) { $this->flags[$flag->getName()] = $flag; } + foreach ($metadata_tags as $tag) { $this->metadata_tags[$tag->getName()] = $tag; } @@ -97,7 +101,7 @@ final class GlobalStructure implements Structure $this->fields[$name] = $field; - if ($field->getType() === Mapping::TYPE_DATE) { + if ($field->getType() === FieldMapping::TYPE_DATE) { $this->date_fields[$name] = $field; }