From 8ffeb58289fa5c3ec601f837a33205de1ae92f34 Mon Sep 17 00:00:00 2001 From: Jean-Yves Gaulier Date: Sun, 29 Mar 2020 20:11:03 +0200 Subject: [PATCH 1/6] PHRAS-3001_es-geopoint-sources_4.1 WIP - fix : gps position indexed by es now comes from the metadata fields defined in conf; (not from exif anymore). - todo/wip : decide conf place, since today we only have an array of geo providers - todo/wip : does exif apply as fallback if nothing in conf ? or if metadata not set in record ? --- .../Provider/SearchEngineServiceProvider.php | 2 + .../Elastic/DataboxFetcherFactory.php | 13 +++- .../Indexer/Record/Hydrator/GpsPosition.php | 18 +++-- .../Record/Hydrator/MetadataHydrator.php | 75 ++++++++++++------- 4 files changed, 71 insertions(+), 37 deletions(-) diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php index e342bf8380..8968ef5f0a 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php @@ -43,6 +43,7 @@ use Silex\ServiceProviderInterface; use Symfony\Component\EventDispatcher\EventDispatcherInterface; use Symfony\Component\HttpKernel\KernelEvents; + class SearchEngineServiceProvider implements ServiceProviderInterface { public function register(Application $app) @@ -145,6 +146,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface $app['elasticsearch.indexer.databox_fetcher_factory'] = $app->share(function ($app) { return new DataboxFetcherFactory( + $app['conf'], $app['elasticsearch.record_helper'], $app['elasticsearch.options'], $app, diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/DataboxFetcherFactory.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/DataboxFetcherFactory.php index b4eb070896..469ef65fc9 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/DataboxFetcherFactory.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/DataboxFetcherFactory.php @@ -2,6 +2,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic; +use Alchemy\Phrasea\Core\Configuration\PropertyAccess; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\FetcherDelegateInterface; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Fetcher; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\CoreHydrator; @@ -13,8 +14,14 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\TitleHydrator; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms; + class DataboxFetcherFactory { + /** + * @var PropertyAccess phraseanet configuration + */ + private $conf; + /** * @var \ArrayAccess */ @@ -39,14 +46,16 @@ class DataboxFetcherFactory private $options; /** + * @param PropertyAccess $conf * @param RecordHelper $recordHelper * @param ElasticsearchOptions $options * @param \ArrayAccess $container * @param string $structureKey * @param string $thesaurusKey */ - public function __construct(RecordHelper $recordHelper, ElasticsearchOptions $options, \ArrayAccess $container, $structureKey, $thesaurusKey) + public function __construct(PropertyAccess $conf, RecordHelper $recordHelper, ElasticsearchOptions $options, \ArrayAccess $container, $structureKey, $thesaurusKey) { + $this->conf = $conf; $this->recordHelper = $recordHelper; $this->options = $options; $this->container = $container; @@ -70,7 +79,7 @@ class DataboxFetcherFactory [ new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->recordHelper), new TitleHydrator($connection, $this->recordHelper), - new MetadataHydrator($connection, $this->getStructure(), $this->recordHelper), + new MetadataHydrator($this->conf, $connection, $this->getStructure(), $this->recordHelper), new FlagHydrator($this->getStructure(), $databox), new ThesaurusHydrator($this->getStructure(), $this->getThesaurus(), $candidateTerms), new SubDefinitionHydrator($connection) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php index ba0dcf573f..96c5ed146f 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php @@ -29,6 +29,16 @@ class GpsPosition private $latitude; private $latitude_ref; + public function __construct() + { + $this->clear(); + } + + public function clear() + { + $this->longitude = $this->longitude_ref = $this->latitude = $this->latitude_ref = null; + } + public function set($tag_name, $value) { switch ($tag_name) { @@ -95,19 +105,11 @@ class GpsPosition public function getCompositeLongitude() { - if ($this->longitude === null) { - return null; - } - return $this->longitude ; } public function getCompositeLatitude() { - if ($this->latitude === null) { - return null; - } - return $this->latitude; } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 5dc2471cb9..2bad805a09 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -11,6 +11,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator; +use Alchemy\Phrasea\Core\Configuration\PropertyAccess; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception; use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; @@ -24,28 +25,42 @@ use InvalidArgumentException; class MetadataHydrator implements HydratorInterface { + private $conf; private $connection; private $structure; private $helper; - private $gps_position_buffer = []; + private $lat_fieldname; // get from conf + private $lon_fieldname; // get from conf + private $caption_gps_position; + private $exif_gps_position; - public function __construct(DriverConnection $connection, Structure $structure, RecordHelper $helper) + public function __construct(PropertyAccess $conf, DriverConnection $connection, Structure $structure, RecordHelper $helper) { + $this->conf = $conf; $this->connection = $connection; $this->structure = $structure; $this->helper = $helper; + + // get the fieldnames of source of lat / lon geo fields (defined in instance conf) + $this->lat_fieldname = $conf->get(['geocoding', 'lat_fieldname']); + $this->lon_fieldname = $conf->get(['geocoding', 'lon_fieldname']); + + $this->caption_gps_position = new GpsPosition(); + $this->exif_gps_position = new GpsPosition(); } public function hydrateRecords(array &$records) { - $sql = "(SELECT record_id, ms.name AS `key`, m.value AS value, 'caption' AS type, ms.business AS private\n" + $sql = "SELECT * FROM (" + . "(SELECT record_id, ms.name AS `key`, m.value AS value, 'caption' AS type, ms.business AS private\n" . " FROM metadatas AS m INNER JOIN metadatas_structure AS ms ON (ms.id = m.meta_struct_id)\n" . " WHERE record_id IN (?))\n" . "UNION\n" . "(SELECT record_id, t.name AS `key`, t.value AS value, 'exif' AS type, 0 AS private\n" . " FROM technical_datas AS t\n" - . " WHERE record_id IN (?))\n"; + . " WHERE record_id IN (?))\n" + . ") AS t ORDER BY record_id"; $ids = array_keys($records); $statement = $this->connection->executeQuery( @@ -54,7 +69,17 @@ class MetadataHydrator implements HydratorInterface array(Connection::PARAM_INT_ARRAY, Connection::PARAM_INT_ARRAY) ); + $record_id = -1; while ($metadata = $statement->fetch()) { + + if($metadata['record_id'] !== $record_id) { + // record has changed, don't mix with previous one + $this->caption_gps_position->clear(); + $this->exif_gps_position->clear(); + + $record_id = $metadata['record_id']; + } + // Store metadata value $key = $metadata['key']; $value = trim($metadata['value']); @@ -64,10 +89,10 @@ class MetadataHydrator implements HydratorInterface continue; } - $id = $metadata['record_id']; - if (isset($records[$id])) { - $record =& $records[$id]; - } else { + if (isset($records[$record_id])) { + $record =& $records[$record_id]; + } + else { throw new Exception('Received metadata from unexpected record'); } @@ -89,13 +114,22 @@ class MetadataHydrator implements HydratorInterface $record[$field] = array(); } $record[$field][] = $value; + + if($key === $this->lat_fieldname) { + $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LATITUDE_TAG_NAME, $value); + } + elseif($key === $this->lon_fieldname) { + $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LONGITUDE_TAG_NAME, $value); + } break; case 'exif': + /* gps position only comes from caption field define in conf if (GpsPosition::isSupportedTagName($key)) { - $this->handleGpsPosition($records, $id, $key, $value); + $this->handleGpsPosition($this->exif_gps_position, $record, $key, $value); break; } + */ $tag = $this->structure->getMetadataTagByName($key); if ($tag) { $value = $this->helper->sanitizeValue($value, $tag->getType()); @@ -109,38 +143,25 @@ class MetadataHydrator implements HydratorInterface break; } } - - $this->clearGpsPositionBuffer(); } - private function handleGpsPosition(&$records, $id, $tag_name, $value) + private function handleGpsPosition(GpsPosition &$position, &$record, $tag_name, $value) { - // Get position object - if (!isset($this->gps_position_buffer[$id])) { - $this->gps_position_buffer[$id] = new GpsPosition(); - } - $position = $this->gps_position_buffer[$id]; // Push this tag into object $position->set($tag_name, $value); + // Try to output complete position if ($position->isCompleteComposite()) { $lon = $position->getCompositeLongitude(); $lat = $position->getCompositeLatitude(); - $records[$id]['metadata_tags']['Longitude'] = $lon; - $records[$id]['metadata_tags']['Latitude'] = $lat; + $record['metadata_tags']['Longitude'] = $lon; + $record['metadata_tags']['Latitude'] = $lat; - $records[$id]["location"] = [ + $record["location"] = [ "lat" => $lat, "lon" => $lon ]; - - unset($this->gps_position_buffer[$id]); } } - - private function clearGpsPositionBuffer() - { - $this->gps_position_buffer = []; - } } From 9f216813b7db9e33fd8ee3ffd6c0c7c2d3363130 Mon Sep 17 00:00:00 2001 From: Jean-Yves Gaulier Date: Tue, 31 Mar 2020 17:16:57 +0200 Subject: [PATCH 2/6] PHRAS-3001_es-geopoint-sources_4.1 - fix : follow the original conf structure (position-fields into provider) - add : handles "latlng" format --- .../Indexer/Record/Hydrator/GpsPosition.php | 48 +++++++++++++++++++ .../Record/Hydrator/MetadataHydrator.php | 35 ++++++++++---- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php index 96c5ed146f..9d4f1aabf7 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/GpsPosition.php @@ -15,6 +15,7 @@ use Assert\Assertion; class GpsPosition { + const FULL_GEO_NOTATION = 'FullNotation'; const LONGITUDE_TAG_NAME = 'Longitude'; const LONGITUDE_REF_TAG_NAME = 'LongitudeRef'; const LONGITUDE_REF_WEST = 'W'; @@ -74,6 +75,53 @@ class GpsPosition $this->latitude_ref = $normalized; break; + case self::FULL_GEO_NOTATION: + $re = '/(-?\d+(?:\.\d+)?°?)\s*(\d+(?:\.\d+)?\')?\s*(\d+(?:\.\d+)?")?\s*(N|S|E|W)?/um'; + $normalized = trim(strtoupper($value)); + $matches = null; + preg_match_all($re, $normalized, $matches, PREG_SET_ORDER, 0); + if(count($matches) === 2) { // we need lat and lon + $lat = $lon = null; + foreach ($matches as $imatch => $match) { + if(count($match) != 5) { + continue; + } + $v = 0.0; + for($part=1, $div=1.0; $part<=3; $part++, $div*=60.0) { + $v += floatval($match[$part]) / $div; + } + switch($match[4]) { // N S E W + case 'N': + $lat = $v; + break; + case 'S': + $lat = -$v; + break; + case 'E': + $lon = $v; + break; + case 'W': + $lon = -$v; + break; + case '': // no ref -> lat lon (first=lat, second=lon) + if($imatch === 0) { + $lat = $v; + } + else { + $lon = $v; + } + break; + default: + throw new \InvalidArgumentException(sprintf('Unsupported reference "%s", should be N|S|E|W.', $match[4])); + } + } + if($lat !== null && $lon != null) { + $this->set(self::LATITUDE_TAG_NAME, $lat); + $this->set(self::LONGITUDE_TAG_NAME, $lon); + } + } + break; + default: throw new \InvalidArgumentException(sprintf('Unsupported tag name "%s".', $tag_name)); } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 2bad805a09..4bb9ea308c 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -30,8 +30,8 @@ class MetadataHydrator implements HydratorInterface private $structure; private $helper; - private $lat_fieldname; // get from conf - private $lon_fieldname; // get from conf + private $position_fields_mapping; // get from conf + private $caption_gps_position; private $exif_gps_position; @@ -43,8 +43,14 @@ class MetadataHydrator implements HydratorInterface $this->helper = $helper; // get the fieldnames of source of lat / lon geo fields (defined in instance conf) - $this->lat_fieldname = $conf->get(['geocoding', 'lat_fieldname']); - $this->lon_fieldname = $conf->get(['geocoding', 'lon_fieldname']); + $this->position_fields_mapping = []; + foreach($conf->get(['geocoding-providers'], []) as $provider) { + if($provider['enabled'] && array_key_exists('position-fields', $provider)) { + foreach ($provider['position-fields'] as $position_field) { + $this->position_fields_mapping[$position_field['name']] = $position_field['type']; + } + } + } $this->caption_gps_position = new GpsPosition(); $this->exif_gps_position = new GpsPosition(); @@ -115,12 +121,23 @@ class MetadataHydrator implements HydratorInterface } $record[$field][] = $value; - if($key === $this->lat_fieldname) { - $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LATITUDE_TAG_NAME, $value); - } - elseif($key === $this->lon_fieldname) { - $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LONGITUDE_TAG_NAME, $value); + if(array_key_exists($key, $this->position_fields_mapping)) { + // this field is mapped as a position part (lat, lon, latlon), push it + switch($this->position_fields_mapping[$key]) { + case 'lat': + $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LATITUDE_TAG_NAME, $value); + break; + case 'lng': + case 'lon': + $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LONGITUDE_TAG_NAME, $value); + break; + case 'latlng': + case 'latlon': + $this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::FULL_GEO_NOTATION, $value); + break; + } } + break; case 'exif': From 027f7e1571c7b14602cbe6a656eb0f341a4601c3 Mon Sep 17 00:00:00 2001 From: Jean-Yves Gaulier Date: Tue, 31 Mar 2020 17:54:22 +0200 Subject: [PATCH 3/6] PHRAS-3001_es-geopoint-sources_4.1 - fix : exif source is a fallback if no good values are found in the configured source field (or if conf is not set) --- .../Elastic/Indexer/Record/Hydrator/MetadataHydrator.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 4bb9ea308c..f930fce740 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -141,12 +141,12 @@ class MetadataHydrator implements HydratorInterface break; case 'exif': - /* gps position only comes from caption field define in conf - if (GpsPosition::isSupportedTagName($key)) { + // exif gps is a first-chance if caption is not yet set + // anyway if caption is set later, it will override the exif values + if (GpsPosition::isSupportedTagName($key) && !$this->caption_gps_position->isCompleteComposite()) { $this->handleGpsPosition($this->exif_gps_position, $record, $key, $value); break; } - */ $tag = $this->structure->getMetadataTagByName($key); if ($tag) { $value = $this->helper->sanitizeValue($value, $tag->getType()); From ed0c2bbf2e5f956b5b8b0bcf309fab8d9d56be3e Mon Sep 17 00:00:00 2001 From: Harrys Ravalomanana Date: Wed, 1 Apr 2020 17:27:54 +0400 Subject: [PATCH 4/6] PHRAS-2992 fix date translation --- lib/classes/phraseadate.php | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/lib/classes/phraseadate.php b/lib/classes/phraseadate.php index a9732e7822..b4a53e11b9 100644 --- a/lib/classes/phraseadate.php +++ b/lib/classes/phraseadate.php @@ -149,17 +149,24 @@ class phraseadate */ private function formatDate(DateTime $date, $locale, $format) { - switch ($locale) { default: case 'fr': switch ($format) { default: case 'DAY_MONTH': - $date_formated = strftime("%e %B", $date->format('U')); + $formatM = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'dd MMMM' + ); + $date_formated = $formatM->format($date); break; case 'DAY_MONTH_YEAR': - $date_formated = strftime("%e %B %Y", $date->format('U')); + $formatY = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'dd MMMM yyyy' + ); + $date_formated = $formatY->format($date); break; } break; @@ -167,10 +174,18 @@ class phraseadate switch ($format) { default: case 'DAY_MONTH': - $date_formated = strftime("%B %e", $date->format('U')); + $formatM = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'MMMM dd' + ); + $date_formated = $formatM->format($date); break; case 'DAY_MONTH_YEAR': - $date_formated = strftime("%B %e %Y", $date->format('U')); + $formatY = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'MMMM dd yyyy' + ); + $date_formated = $formatY->format($date); break; } break; @@ -178,18 +193,26 @@ class phraseadate switch ($format) { default: case 'DAY_MONTH': - $date_formated = strftime("%e. %B", $date->format('U')); + $formatM = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'dd MMMM' + ); + $date_formated = $formatM->format($date); break; case 'DAY_MONTH_YEAR': - $date_formated = strftime("%e. %B %Y", $date->format('U')); + $formatY = new IntlDateFormatter( + $locale, + NULL, NULL, NULL, NULL, 'dd MMMM yyyy' + ); + $date_formated = $formatY->format($date); break; } - break; } - return $date_formated; } + + /** * * @param string $isodelimdate From eddf95e71b6e6d1275e6a811a58e397ab419170e Mon Sep 17 00:00:00 2001 From: Harrys Ravalomanana Date: Wed, 1 Apr 2020 17:40:01 +0400 Subject: [PATCH 5/6] PHRAS-2992 --- lib/classes/phraseadate.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/classes/phraseadate.php b/lib/classes/phraseadate.php index b4a53e11b9..16990a65d4 100644 --- a/lib/classes/phraseadate.php +++ b/lib/classes/phraseadate.php @@ -206,13 +206,12 @@ class phraseadate ); $date_formated = $formatY->format($date); break; - } - } + }; + break; + } return $date_formated; } - - /** * * @param string $isodelimdate From 1b752fde1cd19501bcfd56559aede9fc0783b6b2 Mon Sep 17 00:00:00 2001 From: Harrys Ravalomanana Date: Wed, 1 Apr 2020 17:42:36 +0400 Subject: [PATCH 6/6] PHRAS-2992 code factoring --- lib/classes/phraseadate.php | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/lib/classes/phraseadate.php b/lib/classes/phraseadate.php index 16990a65d4..e6599283cc 100644 --- a/lib/classes/phraseadate.php +++ b/lib/classes/phraseadate.php @@ -151,6 +151,7 @@ class phraseadate { switch ($locale) { default: + case 'de': case 'fr': switch ($format) { default: @@ -189,26 +190,7 @@ class phraseadate break; } break; - case 'de': - switch ($format) { - default: - case 'DAY_MONTH': - $formatM = new IntlDateFormatter( - $locale, - NULL, NULL, NULL, NULL, 'dd MMMM' - ); - $date_formated = $formatM->format($date); - break; - case 'DAY_MONTH_YEAR': - $formatY = new IntlDateFormatter( - $locale, - NULL, NULL, NULL, NULL, 'dd MMMM yyyy' - ); - $date_formated = $formatY->format($date); - break; - }; - break; - } + } return $date_formated; }