From bd53e98b0d8b1529197adb90fb1e55e3dc749829 Mon Sep 17 00:00:00 2001 From: aynsix Date: Thu, 15 Aug 2019 16:09:35 +0400 Subject: [PATCH] port to 4.1 fix ES index NUL --- .../Elastic/Indexer/BulkOperation.php | 9 ++-- .../Record/Hydrator/MetadataHydrator.php | 52 ++++--------------- .../Indexer/Record/Hydrator/TitleHydrator.php | 45 +++++++++------- .../SearchEngine/Elastic/RecordHelper.php | 27 ++++++++++ 4 files changed, 67 insertions(+), 66 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php index 6ea641492d..b8c834d9f0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php @@ -155,15 +155,16 @@ class BulkOperation // nb: results (items) are returned IN THE SAME ORDER as commands were pushed in the stack // so the items[X] match the operationIdentifiers[X] foreach ($response['items'] as $key => $item) { - foreach($item as $command=>$result) { // command may be "index" or "delete" - if($response['errors'] && $result['status'] >= 400) { // 4xx or 5xx error - throw new Exception(sprintf('%d: %s', $key, var_export($result, true))); + foreach ($item as $command=>$result) { // command may be "index" or "delete" + if ($response['errors'] && $result['status'] >= 400) { // 4xx or 5xx + $err = array_key_exists('error', $result) ? var_export($result['error'], true) : ($command . " error " . $result['status']); + throw new Exception(sprintf('%d: %s', $key, $err)); } } $operationIdentifier = $this->operationIdentifiers[$key]; - if(is_string($operationIdentifier) || is_int($operationIdentifier)) { // dont include null keys + if (is_string($operationIdentifier) || is_int($operationIdentifier)) { // dont include null keys $callbackData[$operationIdentifier] = $response['items'][$key]; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 32b93502d4..5dc2471cb9 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -39,18 +39,13 @@ class MetadataHydrator implements HydratorInterface public function hydrateRecords(array &$records) { - $sql = <<connection->executeQuery( @@ -62,7 +57,7 @@ SQL; while ($metadata = $statement->fetch()) { // Store metadata value $key = $metadata['key']; - $value = $metadata['value']; + $value = trim($metadata['value']); // Do not keep empty values if ($key === '' || $value === '') { @@ -80,7 +75,7 @@ SQL; case 'caption': // Sanitize fields $value = StringHelper::crlfNormalize($value); - $value = $this->sanitizeValue($value, $this->structure->typeOf($key)); + $value = $this->helper->sanitizeValue($value, $this->structure->typeOf($key)); // Private caption fields are kept apart $type = $metadata['private'] ? 'private_caption' : 'caption'; // Caption are multi-valued @@ -103,7 +98,7 @@ SQL; } $tag = $this->structure->getMetadataTagByName($key); if ($tag) { - $value = $this->sanitizeValue($value, $tag->getType()); + $value = $this->helper->sanitizeValue($value, $tag->getType()); } // EXIF data is single-valued $record['metadata_tags'][$key] = $value; @@ -118,33 +113,6 @@ SQL; $this->clearGpsPositionBuffer(); } - private function sanitizeValue($value, $type) - { - switch ($type) { - case FieldMapping::TYPE_STRING: - return str_replace("\0", "", $value); - - case FieldMapping::TYPE_DATE: - return $this->helper->sanitizeDate($value); - - case FieldMapping::TYPE_FLOAT: - case FieldMapping::TYPE_DOUBLE: - return (float) $value; - - case FieldMapping::TYPE_INTEGER: - case FieldMapping::TYPE_LONG: - case FieldMapping::TYPE_SHORT: - case FieldMapping::TYPE_BYTE: - return (int) $value; - - case FieldMapping::TYPE_BOOLEAN: - return (bool) $value; - - default: - return $value; - } - } - private function handleGpsPosition(&$records, $id, $tag_name, $value) { // Get position object diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php index 88382aa7a7..76dc4704b4 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php @@ -11,6 +11,8 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Doctrine\DBAL\Connection; use Doctrine\DBAL\Driver\Connection as DriverConnection; @@ -18,31 +20,34 @@ class TitleHydrator implements HydratorInterface { private $connection; - public function __construct(DriverConnection $connection) + /** @var RecordHelper */ + private $helper; + + public function __construct(DriverConnection $connection, RecordHelper $helper) { $this->connection = $connection; + $this->helper = $helper; } public function hydrateRecords(array &$records) { - $sql = <<connection->executeQuery( $sql, array(array_keys($records)), @@ -50,7 +55,7 @@ SQL; ); while ($row = $statement->fetch()) { - $records[$row['record_id']]['title'][$row['locale']] = $row['title']; + $records[$row['record_id']]['title'][$row['locale']] = $this->helper->sanitizeValue($row['title'], FieldMapping::TYPE_STRING); } } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php index ffb0c71adf..5debeff82e 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php @@ -116,4 +116,31 @@ class RecordHelper return null; } } + + public function sanitizeValue($value, $type) + { + switch ($type) { + case FieldMapping::TYPE_DATE: + return self::sanitizeDate($value); + + case FieldMapping::TYPE_FLOAT: + case FieldMapping::TYPE_DOUBLE: + return (float) $value; + + case FieldMapping::TYPE_INTEGER: + case FieldMapping::TYPE_LONG: + case FieldMapping::TYPE_SHORT: + case FieldMapping::TYPE_BYTE: + return (int) $value; + + case FieldMapping::TYPE_BOOLEAN: + return (bool) $value; + + case FieldMapping::TYPE_STRING: + return str_replace("\0", '', $value); + + default: + return $value; + } + } }