mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-17 15:03:25 +00:00
Record caption & EXIF mapping
This commit is contained in:
@@ -12,7 +12,9 @@
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic;
|
||||
|
||||
use Elasticsearch\Client;
|
||||
use Exception;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use media_subdef;
|
||||
use igorw;
|
||||
|
||||
class Indexer
|
||||
@@ -164,23 +166,35 @@ class Indexer
|
||||
->add('mime', 'string')->notAnalyzed()
|
||||
->add('type', 'string')->notAnalyzed()
|
||||
// Dates
|
||||
->add('created_at', 'date')->format('yyyy-MM-dd HH:mm:ss')
|
||||
->add('updated_at', 'date')->format('yyyy-MM-dd HH:mm:ss')
|
||||
->add('created_at', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
->add('updated_at', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
;
|
||||
|
||||
// Caption mapping
|
||||
$captionMapping = new Mapping();
|
||||
$mapping->add('caption', $captionMapping);
|
||||
$privateCaptionMapping = new Mapping();
|
||||
$mapping->add('private_caption', $privateCaptionMapping);
|
||||
foreach ($this->getRecordFieldsStructure() as $name => $params) {
|
||||
if ($params['private']) {
|
||||
$privateCaptionMapping->add($name, $params['type']);
|
||||
// TODO "include_in_all" = false for business fields ?
|
||||
} else {
|
||||
$captionMapping->add($name, $params['type']);
|
||||
}
|
||||
}
|
||||
|
||||
// EXIF
|
||||
$mapping->add('exif', $this->getRecordExifMapping());
|
||||
|
||||
// Status
|
||||
$mapping->add('flags', $this->getRecordFlagsMapping());
|
||||
|
||||
return $mapping->export();
|
||||
|
||||
|
||||
// TODO Migrate code below this line
|
||||
|
||||
$status = [];
|
||||
for ($i = 0; $i <= 32; $i ++) {
|
||||
$status['status-'.$i] = [
|
||||
'type' => 'integer',
|
||||
];
|
||||
}
|
||||
|
||||
$recordTypeMapping = [
|
||||
[
|
||||
'_source' => [
|
||||
'enabled' => true
|
||||
],
|
||||
@@ -206,146 +220,121 @@ class Indexer
|
||||
]
|
||||
],
|
||||
]
|
||||
],
|
||||
'properties' => [
|
||||
'record_id' => [
|
||||
'type' => 'integer',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'databox_id' => [
|
||||
'type' => 'integer',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'base_id' => [
|
||||
'type' => 'integer',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'mime_type' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'title' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'original_name' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'updated_on' => [
|
||||
'type' => 'date',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'created_on' => [
|
||||
'type' => 'date',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'collection_id' => [
|
||||
'type' => 'integer',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'sha256' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'type' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'phrasea_type' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'uuid' => [
|
||||
'type' => 'string',
|
||||
'index' => 'not_analyzed',
|
||||
],
|
||||
'status' => [
|
||||
'properties' => $status
|
||||
],
|
||||
"technical_informations" => [
|
||||
'properties' => [
|
||||
\media_subdef::TC_DATA_WIDTH => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_HEIGHT => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_COLORSPACE => [
|
||||
'type' => 'string'
|
||||
],
|
||||
\media_subdef::TC_DATA_CHANNELS => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_ORIENTATION => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_COLORDEPTH => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_DURATION => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_AUDIOCODEC => [
|
||||
'type' => 'string'
|
||||
],
|
||||
\media_subdef::TC_DATA_AUDIOSAMPLERATE => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_VIDEOCODEC => [
|
||||
'type' => 'string'
|
||||
],
|
||||
\media_subdef::TC_DATA_FRAMERATE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_MIMETYPE => [
|
||||
'type' => 'string'
|
||||
],
|
||||
\media_subdef::TC_DATA_FILESIZE => [
|
||||
'type' => 'long'
|
||||
],
|
||||
\media_subdef::TC_DATA_LONGITUDE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_LATITUDE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_FOCALLENGTH => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_CAMERAMODEL => [
|
||||
'type' => 'string'
|
||||
],
|
||||
\media_subdef::TC_DATA_FLASHFIRED => [
|
||||
'type' => 'boolean'
|
||||
],
|
||||
\media_subdef::TC_DATA_APERTURE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_SHUTTERSPEED => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_HYPERFOCALDISTANCE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
\media_subdef::TC_DATA_ISO => [
|
||||
'type' => 'integer'
|
||||
],
|
||||
\media_subdef::TC_DATA_LIGHTVALUE => [
|
||||
'type' => 'float'
|
||||
],
|
||||
]
|
||||
],
|
||||
"caption" => [
|
||||
'properties' => $captionFields
|
||||
],
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
if (0 < count ($businessFields)) {
|
||||
$recordTypeMapping['properties']['caption-business'] = [
|
||||
'properties' => $businessFields
|
||||
];
|
||||
private function getRecordFieldsStructure()
|
||||
{
|
||||
$fields = array();
|
||||
|
||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||
printf("Databox %d\n", $databox->get_sbas_id());
|
||||
foreach ($databox->get_meta_structure() as $fieldStructure) {
|
||||
$field = array();
|
||||
// Field type
|
||||
switch ($fieldStructure->get_type()) {
|
||||
case \databox_field::TYPE_DATE:
|
||||
$field['type'] = 'date';
|
||||
break;
|
||||
case \databox_field::TYPE_NUMBER:
|
||||
$field['type'] = 'string'; // TODO integer, float, double ?
|
||||
break;
|
||||
case \databox_field::TYPE_STRING:
|
||||
case \databox_field::TYPE_TEXT:
|
||||
$field['type'] = 'string';
|
||||
break;
|
||||
default:
|
||||
throw new Exception(sprintf('Invalid field type "%s", expected "date", "number" or "string".', $fieldStructure->get_type()));
|
||||
break;
|
||||
}
|
||||
|
||||
// Business rules
|
||||
$field['private'] = $fieldStructure->isBusiness();
|
||||
|
||||
$name = $fieldStructure->get_name();
|
||||
|
||||
printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
||||
|
||||
// Since mapping is merged between databoxes, two fields may
|
||||
// have conflicting names. Indexing is the same for a given
|
||||
// type so we reject only thoose with different types.
|
||||
if (isset($fields[$name])) {
|
||||
if ($fields[$name]['type'] !== $field['type']) {
|
||||
throw new Exception('Databox mapping can not be merged, incompatible field types');
|
||||
}
|
||||
// TODO other structure incompatibilities
|
||||
|
||||
printf("Merged with previous \"%s\" field\n", $name);
|
||||
}
|
||||
|
||||
$fields[$name] = $field;
|
||||
}
|
||||
}
|
||||
|
||||
return $fields;
|
||||
}
|
||||
|
||||
private function getRecordExifMapping()
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
$mapping
|
||||
->add(media_subdef::TC_DATA_WIDTH, 'integer')
|
||||
->add(media_subdef::TC_DATA_HEIGHT, 'integer')
|
||||
->add(media_subdef::TC_DATA_COLORSPACE, 'string')->notAnalyzed()
|
||||
->add(media_subdef::TC_DATA_CHANNELS, 'integer')
|
||||
->add(media_subdef::TC_DATA_ORIENTATION, 'integer')
|
||||
->add(media_subdef::TC_DATA_COLORDEPTH, 'integer')
|
||||
->add(media_subdef::TC_DATA_DURATION, 'integer')
|
||||
->add(media_subdef::TC_DATA_AUDIOCODEC, 'string')->notAnalyzed()
|
||||
->add(media_subdef::TC_DATA_AUDIOSAMPLERATE, 'integer')
|
||||
->add(media_subdef::TC_DATA_VIDEOCODEC, 'string')->notAnalyzed()
|
||||
->add(media_subdef::TC_DATA_FRAMERATE, 'float')
|
||||
->add(media_subdef::TC_DATA_MIMETYPE, 'string')->notAnalyzed()
|
||||
->add(media_subdef::TC_DATA_FILESIZE, 'long')
|
||||
// TODO use geo point type for lat/long
|
||||
->add(media_subdef::TC_DATA_LONGITUDE, 'float')
|
||||
->add(media_subdef::TC_DATA_LATITUDE, 'float')
|
||||
->add(media_subdef::TC_DATA_FOCALLENGTH, 'float')
|
||||
->add(media_subdef::TC_DATA_CAMERAMODEL, 'string')
|
||||
->add(media_subdef::TC_DATA_FLASHFIRED, 'boolean')
|
||||
->add(media_subdef::TC_DATA_APERTURE, 'float')
|
||||
->add(media_subdef::TC_DATA_SHUTTERSPEED, 'float')
|
||||
->add(media_subdef::TC_DATA_HYPERFOCALDISTANCE, 'float')
|
||||
->add(media_subdef::TC_DATA_ISO, 'integer')
|
||||
->add(media_subdef::TC_DATA_LIGHTVALUE, 'float')
|
||||
;
|
||||
|
||||
return $mapping;
|
||||
}
|
||||
|
||||
private function getRecordFlagsMapping()
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
$seen = array();
|
||||
|
||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||
foreach ($databox->get_statusbits() as $bit => $status) {
|
||||
$key = self::normalizeFlagKey($status['labelon']);
|
||||
// We only add to mapping new statuses
|
||||
if (!in_array($key, $seen)) {
|
||||
$mapping->add($key, 'boolean');
|
||||
$seen[] = $key;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $mapping;
|
||||
}
|
||||
|
||||
private static function normalizeFlagKey($key)
|
||||
{
|
||||
$key = normalizer_normalize($key);
|
||||
$key = preg_replace('/[^A-Za-z1-9]/', '_', $key);
|
||||
$key = preg_replace('/_+/', '_', $key);
|
||||
$key = strtolower($key);
|
||||
$key = trim($key, '_');
|
||||
|
||||
return $key;
|
||||
}
|
||||
}
|
||||
|
@@ -11,15 +11,60 @@
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic;
|
||||
|
||||
use LogicException;
|
||||
use RuntimeException;
|
||||
|
||||
class Mapping
|
||||
{
|
||||
private $fields = array();
|
||||
private $current;
|
||||
|
||||
const DATE_FORMAT_MYSQL = 'yyyy-MM-dd HH:mm:ss';
|
||||
|
||||
// Core types
|
||||
const TYPE_STRING = 'string';
|
||||
const TYPE_BOOLEAN = 'boolean';
|
||||
const TYPE_DATE = 'date';
|
||||
// Number core types
|
||||
const TYPE_FLOAT = 'float';
|
||||
const TYPE_DOUBLE = 'double';
|
||||
const TYPE_INTEGER = 'integer';
|
||||
const TYPE_LONG = 'long';
|
||||
const TYPE_SHORT = 'short';
|
||||
const TYPE_BYTE = 'byte';
|
||||
// Compound types
|
||||
const TYPE_OBJECT = 'object';
|
||||
|
||||
private static $types = array(
|
||||
self::TYPE_STRING,
|
||||
self::TYPE_BOOLEAN,
|
||||
self::TYPE_DATE,
|
||||
self::TYPE_FLOAT,
|
||||
self::TYPE_DOUBLE,
|
||||
self::TYPE_INTEGER,
|
||||
self::TYPE_LONG,
|
||||
self::TYPE_SHORT,
|
||||
self::TYPE_BYTE,
|
||||
);
|
||||
|
||||
public function add($name, $type)
|
||||
{
|
||||
// TODO Check input
|
||||
$this->fields[$name] = array('type' => $type);
|
||||
$field = array();
|
||||
if ($type instanceof self) {
|
||||
$field['type'] = self::TYPE_OBJECT;
|
||||
$field['properties'] = $type;
|
||||
}
|
||||
elseif (in_array($type, self::$types)) {
|
||||
$field['type'] = $type;
|
||||
} else {
|
||||
throw new RuntimeException(sprintf(
|
||||
'Invalid field mapping type "%s", expected "%s" or Mapping instance.',
|
||||
$type,
|
||||
implode('", "', self::$types)
|
||||
));
|
||||
}
|
||||
|
||||
$this->fields[$name] = $field;
|
||||
$this->current = $name;
|
||||
|
||||
return $this;
|
||||
@@ -27,14 +72,27 @@ class Mapping
|
||||
|
||||
public function export()
|
||||
{
|
||||
return ['properties' => $this->fields];
|
||||
return ['properties' => $this->exportProperties()];
|
||||
}
|
||||
|
||||
public function exportProperties()
|
||||
{
|
||||
$properties = array();
|
||||
foreach ($this->fields as $name => $field) {
|
||||
$properties[$name] = $field;
|
||||
if ($field['type'] === self::TYPE_OBJECT) {
|
||||
$properties[$name]['properties'] = $field['properties']->exportProperties();
|
||||
}
|
||||
}
|
||||
|
||||
return $properties;
|
||||
}
|
||||
|
||||
public function notAnalyzed()
|
||||
{
|
||||
$field =& $this->currentField();
|
||||
if ($field['type'] !== 'string') {
|
||||
throw new \LogicException('Only string fields can be not analyzed');
|
||||
$field = &$this->currentField();
|
||||
if ($field['type'] !== self::TYPE_STRING) {
|
||||
throw new LogicException('Only string fields can be not analyzed');
|
||||
}
|
||||
$field['index'] = 'not_analyzed';
|
||||
|
||||
@@ -43,9 +101,9 @@ class Mapping
|
||||
|
||||
public function format($format)
|
||||
{
|
||||
$field =& $this->currentField();
|
||||
if ($field['type'] !== 'date') {
|
||||
throw new \LogicException('Only date fields can have a format');
|
||||
$field = &$this->currentField();
|
||||
if ($field['type'] !== self::TYPE_DATE) {
|
||||
throw new LogicException('Only date fields can have a format');
|
||||
}
|
||||
$field['format'] = $format;
|
||||
|
||||
@@ -55,7 +113,7 @@ class Mapping
|
||||
protected function ¤tField()
|
||||
{
|
||||
if (null === $this->current) {
|
||||
throw new \LogicException('You must add a field first');
|
||||
throw new LogicException('You must add a field first');
|
||||
}
|
||||
|
||||
return $this->fields[$this->current];
|
||||
|
Reference in New Issue
Block a user