Merge pull request #3416 from alchemy-fr/PHRAS-3001_es-geopoint-sources_4.1

PHRAS-3001 #comment  merge es geopoint sources 4.1
This commit is contained in:
Nicolas Maillat
2020-04-02 16:37:19 +02:00
committed by GitHub
4 changed files with 137 additions and 38 deletions

View File

@@ -43,6 +43,7 @@ use Silex\ServiceProviderInterface;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\HttpKernel\KernelEvents;
class SearchEngineServiceProvider implements ServiceProviderInterface
{
public function register(Application $app)
@@ -145,6 +146,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
$app['elasticsearch.indexer.databox_fetcher_factory'] = $app->share(function ($app) {
return new DataboxFetcherFactory(
$app['conf'],
$app['elasticsearch.record_helper'],
$app['elasticsearch.options'],
$app,

View File

@@ -2,6 +2,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Alchemy\Phrasea\Core\Configuration\PropertyAccess;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\FetcherDelegateInterface;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Fetcher;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\CoreHydrator;
@@ -13,8 +14,14 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator\TitleHydrator;
use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
class DataboxFetcherFactory
{
/**
* @var PropertyAccess phraseanet configuration
*/
private $conf;
/**
* @var \ArrayAccess
*/
@@ -39,14 +46,16 @@ class DataboxFetcherFactory
private $options;
/**
* @param PropertyAccess $conf
* @param RecordHelper $recordHelper
* @param ElasticsearchOptions $options
* @param \ArrayAccess $container
* @param string $structureKey
* @param string $thesaurusKey
*/
public function __construct(RecordHelper $recordHelper, ElasticsearchOptions $options, \ArrayAccess $container, $structureKey, $thesaurusKey)
public function __construct(PropertyAccess $conf, RecordHelper $recordHelper, ElasticsearchOptions $options, \ArrayAccess $container, $structureKey, $thesaurusKey)
{
$this->conf = $conf;
$this->recordHelper = $recordHelper;
$this->options = $options;
$this->container = $container;
@@ -70,7 +79,7 @@ class DataboxFetcherFactory
[
new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->recordHelper),
new TitleHydrator($connection, $this->recordHelper),
new MetadataHydrator($connection, $this->getStructure(), $this->recordHelper),
new MetadataHydrator($this->conf, $connection, $this->getStructure(), $this->recordHelper),
new FlagHydrator($this->getStructure(), $databox),
new ThesaurusHydrator($this->getStructure(), $this->getThesaurus(), $candidateTerms),
new SubDefinitionHydrator($connection)

View File

@@ -15,6 +15,7 @@ use Assert\Assertion;
class GpsPosition
{
const FULL_GEO_NOTATION = 'FullNotation';
const LONGITUDE_TAG_NAME = 'Longitude';
const LONGITUDE_REF_TAG_NAME = 'LongitudeRef';
const LONGITUDE_REF_WEST = 'W';
@@ -29,6 +30,16 @@ class GpsPosition
private $latitude;
private $latitude_ref;
public function __construct()
{
$this->clear();
}
public function clear()
{
$this->longitude = $this->longitude_ref = $this->latitude = $this->latitude_ref = null;
}
public function set($tag_name, $value)
{
switch ($tag_name) {
@@ -64,6 +75,53 @@ class GpsPosition
$this->latitude_ref = $normalized;
break;
case self::FULL_GEO_NOTATION:
$re = '/(-?\d+(?:\.\d+)?°?)\s*(\d+(?:\.\d+)?\')?\s*(\d+(?:\.\d+)?")?\s*(N|S|E|W)?/um';
$normalized = trim(strtoupper($value));
$matches = null;
preg_match_all($re, $normalized, $matches, PREG_SET_ORDER, 0);
if(count($matches) === 2) { // we need lat and lon
$lat = $lon = null;
foreach ($matches as $imatch => $match) {
if(count($match) != 5) {
continue;
}
$v = 0.0;
for($part=1, $div=1.0; $part<=3; $part++, $div*=60.0) {
$v += floatval($match[$part]) / $div;
}
switch($match[4]) { // N S E W
case 'N':
$lat = $v;
break;
case 'S':
$lat = -$v;
break;
case 'E':
$lon = $v;
break;
case 'W':
$lon = -$v;
break;
case '': // no ref -> lat lon (first=lat, second=lon)
if($imatch === 0) {
$lat = $v;
}
else {
$lon = $v;
}
break;
default:
throw new \InvalidArgumentException(sprintf('Unsupported reference "%s", should be N|S|E|W.', $match[4]));
}
}
if($lat !== null && $lon != null) {
$this->set(self::LATITUDE_TAG_NAME, $lat);
$this->set(self::LONGITUDE_TAG_NAME, $lon);
}
}
break;
default:
throw new \InvalidArgumentException(sprintf('Unsupported tag name "%s".', $tag_name));
}
@@ -95,19 +153,11 @@ class GpsPosition
public function getCompositeLongitude()
{
if ($this->longitude === null) {
return null;
}
return $this->longitude ;
}
public function getCompositeLatitude()
{
if ($this->latitude === null) {
return null;
}
return $this->latitude;
}

View File

@@ -11,6 +11,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator;
use Alchemy\Phrasea\Core\Configuration\PropertyAccess;
use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception;
use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
@@ -24,28 +25,48 @@ use InvalidArgumentException;
class MetadataHydrator implements HydratorInterface
{
private $conf;
private $connection;
private $structure;
private $helper;
private $gps_position_buffer = [];
private $position_fields_mapping; // get from conf
public function __construct(DriverConnection $connection, Structure $structure, RecordHelper $helper)
private $caption_gps_position;
private $exif_gps_position;
public function __construct(PropertyAccess $conf, DriverConnection $connection, Structure $structure, RecordHelper $helper)
{
$this->conf = $conf;
$this->connection = $connection;
$this->structure = $structure;
$this->helper = $helper;
// get the fieldnames of source of lat / lon geo fields (defined in instance conf)
$this->position_fields_mapping = [];
foreach($conf->get(['geocoding-providers'], []) as $provider) {
if($provider['enabled'] && array_key_exists('position-fields', $provider)) {
foreach ($provider['position-fields'] as $position_field) {
$this->position_fields_mapping[$position_field['name']] = $position_field['type'];
}
}
}
$this->caption_gps_position = new GpsPosition();
$this->exif_gps_position = new GpsPosition();
}
public function hydrateRecords(array &$records)
{
$sql = "(SELECT record_id, ms.name AS `key`, m.value AS value, 'caption' AS type, ms.business AS private\n"
$sql = "SELECT * FROM ("
. "(SELECT record_id, ms.name AS `key`, m.value AS value, 'caption' AS type, ms.business AS private\n"
. " FROM metadatas AS m INNER JOIN metadatas_structure AS ms ON (ms.id = m.meta_struct_id)\n"
. " WHERE record_id IN (?))\n"
. "UNION\n"
. "(SELECT record_id, t.name AS `key`, t.value AS value, 'exif' AS type, 0 AS private\n"
. " FROM technical_datas AS t\n"
. " WHERE record_id IN (?))\n";
. " WHERE record_id IN (?))\n"
. ") AS t ORDER BY record_id";
$ids = array_keys($records);
$statement = $this->connection->executeQuery(
@@ -54,7 +75,17 @@ class MetadataHydrator implements HydratorInterface
array(Connection::PARAM_INT_ARRAY, Connection::PARAM_INT_ARRAY)
);
$record_id = -1;
while ($metadata = $statement->fetch()) {
if($metadata['record_id'] !== $record_id) {
// record has changed, don't mix with previous one
$this->caption_gps_position->clear();
$this->exif_gps_position->clear();
$record_id = $metadata['record_id'];
}
// Store metadata value
$key = $metadata['key'];
$value = trim($metadata['value']);
@@ -64,10 +95,10 @@ class MetadataHydrator implements HydratorInterface
continue;
}
$id = $metadata['record_id'];
if (isset($records[$id])) {
$record =& $records[$id];
} else {
if (isset($records[$record_id])) {
$record =& $records[$record_id];
}
else {
throw new Exception('Received metadata from unexpected record');
}
@@ -89,11 +120,31 @@ class MetadataHydrator implements HydratorInterface
$record[$field] = array();
}
$record[$field][] = $value;
if(array_key_exists($key, $this->position_fields_mapping)) {
// this field is mapped as a position part (lat, lon, latlon), push it
switch($this->position_fields_mapping[$key]) {
case 'lat':
$this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LATITUDE_TAG_NAME, $value);
break;
case 'lng':
case 'lon':
$this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::LONGITUDE_TAG_NAME, $value);
break;
case 'latlng':
case 'latlon':
$this->handleGpsPosition($this->caption_gps_position, $record, GpsPosition::FULL_GEO_NOTATION, $value);
break;
}
}
break;
case 'exif':
if (GpsPosition::isSupportedTagName($key)) {
$this->handleGpsPosition($records, $id, $key, $value);
// exif gps is a first-chance if caption is not yet set
// anyway if caption is set later, it will override the exif values
if (GpsPosition::isSupportedTagName($key) && !$this->caption_gps_position->isCompleteComposite()) {
$this->handleGpsPosition($this->exif_gps_position, $record, $key, $value);
break;
}
$tag = $this->structure->getMetadataTagByName($key);
@@ -109,38 +160,25 @@ class MetadataHydrator implements HydratorInterface
break;
}
}
$this->clearGpsPositionBuffer();
}
private function handleGpsPosition(&$records, $id, $tag_name, $value)
private function handleGpsPosition(GpsPosition &$position, &$record, $tag_name, $value)
{
// Get position object
if (!isset($this->gps_position_buffer[$id])) {
$this->gps_position_buffer[$id] = new GpsPosition();
}
$position = $this->gps_position_buffer[$id];
// Push this tag into object
$position->set($tag_name, $value);
// Try to output complete position
if ($position->isCompleteComposite()) {
$lon = $position->getCompositeLongitude();
$lat = $position->getCompositeLatitude();
$records[$id]['metadata_tags']['Longitude'] = $lon;
$records[$id]['metadata_tags']['Latitude'] = $lat;
$record['metadata_tags']['Longitude'] = $lon;
$record['metadata_tags']['Latitude'] = $lat;
$records[$id]["location"] = [
$record["location"] = [
"lat" => $lat,
"lon" => $lon
];
unset($this->gps_position_buffer[$id]);
}
}
private function clearGpsPositionBuffer()
{
$this->gps_position_buffer = [];
}
}