From d0bbea71b9092663c3956826d4936172bc90dbcd Mon Sep 17 00:00:00 2001 From: aynsix Date: Wed, 14 Aug 2019 18:24:22 +0400 Subject: [PATCH 1/4] use configuration pdftotext binary --- lib/Alchemy/Phrasea/Application.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/Alchemy/Phrasea/Application.php b/lib/Alchemy/Phrasea/Application.php index f586a4f93a..f25e906f57 100644 --- a/lib/Alchemy/Phrasea/Application.php +++ b/lib/Alchemy/Phrasea/Application.php @@ -115,6 +115,7 @@ use Symfony\Component\Form\FormBuilderInterface; use Symfony\Component\Form\FormInterface; use Symfony\Component\Form\FormTypeInterface; use Symfony\Component\HttpFoundation\RedirectResponse; +use Symfony\Component\Process\ExecutableFinder; use Unoconv\UnoconvServiceProvider; use XPDF\PdfToText; use XPDF\XPDFServiceProvider; @@ -237,8 +238,19 @@ class Application extends SilexApplication $this->register(new UnicodeServiceProvider()); $this->register(new ValidatorServiceProvider()); - $this->register(new XPDFServiceProvider()); - $this->setupXpdf(); + + if ($this['configuration.store']->isSetup()) { + $binariesConfig = $this['conf']->get(['main', 'binaries']); + $executableFinder = new ExecutableFinder(); + $this->register(new XPDFServiceProvider(), [ + 'xpdf.configuration' => [ + 'pdftotext.binaries' => isset($binariesConfig['pdftotext_binary']) ? $binariesConfig['pdftotext_binary'] : $executableFinder->find('pdftotext'), + ] + ]); + + $this->setupXpdf(); + } + $this->register(new FileServeServiceProvider()); $this->register(new ManipulatorServiceProvider()); $this->register(new PluginServiceProvider()); From eb938ee9ea19011e424066854b4a0a35dc6848c7 Mon Sep 17 00:00:00 2001 From: aynsix Date: Thu, 15 Aug 2019 11:15:26 +0400 Subject: [PATCH 2/4] formate date from metadata to Y/m/d H:i:s --- .../Phrasea/Metadata/PhraseanetMetadataSetter.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php index 45d098bd11..c72c884203 100644 --- a/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php +++ b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php @@ -14,6 +14,7 @@ namespace Alchemy\Phrasea\Metadata; use Alchemy\Phrasea\Border\File; use Alchemy\Phrasea\Databox\DataboxRepository; use Alchemy\Phrasea\Metadata\Tag\NoSource; +use DateTime; use PHPExiftool\Driver\Metadata\Metadata; class PhraseanetMetadataSetter @@ -66,8 +67,16 @@ class PhraseanetMetadataSetter continue; } - $data['value'] = $value; + if ($field->get_type() == 'date') { + try { + $dateTime = new DateTime($value); + $value = $dateTime->format('Y/m/d H:i:s'); + } catch (\Exception $e) { + // $value unchanged + } + } + $data['value'] = $value; $metadataInRecordFormat[] = $data; } } From 20076df3f8cc5d06fdafae52d615e35a6fb8ddfb Mon Sep 17 00:00:00 2001 From: aynsix Date: Thu, 15 Aug 2019 15:23:03 +0400 Subject: [PATCH 3/4] port to 4.1 date with time on phraseanet --- .../Elastic/AST/KeyValue/FieldKey.php | 5 + .../SearchEngine/Elastic/AST/KeyValue/Key.php | 1 + .../Elastic/AST/KeyValue/MetadataKey.php | 5 + .../Elastic/AST/KeyValue/NativeKey.php | 5 + .../Elastic/AST/KeyValue/RangeExpression.php | 44 +++++--- .../Elastic/AST/KeyValue/TimestampKey.php | 5 + .../Elastic/ElasticSearchEngine.php | 4 +- .../SearchEngine/Elastic/FieldMapping.php | 3 +- .../Elastic/Mapping/DateFieldMapping.php | 5 +- .../SearchEngine/Elastic/RecordHelper.php | 48 ++++++--- .../Elastic/Search/QueryHelper.php | 65 ++++++----- .../Elastic/Search/QueryVisitor.php | 22 ++-- .../Elastic/Structure/ValueChecker.php | 3 +- .../TaskManager/Job/WriteMetadataJob.php | 62 +++++++++-- .../SearchEngine/AST/RangeExpressionTest.php | 2 + .../SearchEngine/resources/queries.csv | 101 +++++++++++++++--- 16 files changed, 287 insertions(+), 93 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/FieldKey.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/FieldKey.php index f4f8553738..293e8c69dd 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/FieldKey.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/FieldKey.php @@ -30,6 +30,11 @@ class FieldKey implements Key, QueryPostProcessor return $this->getField($context)->getIndexField($raw); } + public function getFieldType(QueryContext $context) + { + return $this->getField($context)->getType(); + } + public function isValueCompatible($value, QueryContext $context) { return ValueChecker::isValueCompatible($this->getField($context), $value); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/Key.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/Key.php index 20ee045a59..d664fccb94 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/Key.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/Key.php @@ -6,6 +6,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext; interface Key { + public function getFieldType(QueryContext $context); public function getIndexField(QueryContext $context, $raw = false); public function isValueCompatible($value, QueryContext $context); public function __toString(); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/MetadataKey.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/MetadataKey.php index 292b6e7237..61f9303fe0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/MetadataKey.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/MetadataKey.php @@ -23,6 +23,11 @@ class MetadataKey implements Key return $this->getTag($context)->getIndexField($raw); } + public function getFieldType(QueryContext $context) + { + return $this->getTag($context)->getType(); + } + public function isValueCompatible($value, QueryContext $context) { return ValueChecker::isValueCompatible($this->getTag($context), $value); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/NativeKey.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/NativeKey.php index e486483b71..3c2d334f48 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/NativeKey.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/NativeKey.php @@ -52,6 +52,11 @@ class NativeKey implements Key $this->key = $key; } + public function getFieldType(QueryContext $context) + { + return $this->type; + } + public function getIndexField(QueryContext $context, $raw = false) { return $this->key; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/RangeExpression.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/RangeExpression.php index ec785354ae..17f901a77c 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/RangeExpression.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/RangeExpression.php @@ -2,18 +2,20 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\AST\KeyValue; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; +use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Field as StructureField; use Assert\Assertion; -use Alchemy\Phrasea\SearchEngine\Elastic\AST\KeyValue\FieldKey; -use Alchemy\Phrasea\SearchEngine\Elastic\AST\KeyValue\Key; use Alchemy\Phrasea\SearchEngine\Elastic\AST\Node; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\QueryException; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryContext; -use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryHelper; use Alchemy\Phrasea\SearchEngine\Elastic\Search\QueryPostProcessor; class RangeExpression extends Node { + /** @var FieldKey */ private $key; + private $lower_bound; private $lower_inclusive; private $higher_bound; @@ -55,20 +57,34 @@ class RangeExpression extends Node public function buildQuery(QueryContext $context) { $params = array(); - if ($this->lower_bound !== null) { - $this->assertValueCompatible($this->lower_bound, $context); - if ($this->lower_inclusive) { - $params['gte'] = $this->lower_bound; - } else { - $params['gt'] = $this->lower_bound; + /** @var StructureField $field */ + // $field = $this->key->getField($context); + $lower_bound = $this->lower_bound; + $higher_bound = $this->higher_bound; + + if($this->key->getFieldType($context) === FieldMapping::TYPE_DATE) { + if($lower_bound !== null) { + $lower_bound = RecordHelper::sanitizeDate($lower_bound); + } + if($higher_bound !== null) { + $higher_bound = RecordHelper::sanitizeDate($higher_bound); } } - if ($this->higher_bound !== null) { - $this->assertValueCompatible($this->higher_bound, $context); - if ($this->higher_inclusive) { - $params['lte'] = $this->higher_bound; + + if ($lower_bound !== null) { + $this->assertValueCompatible($lower_bound, $context); + if ($this->lower_inclusive) { + $params['gte'] = $lower_bound; } else { - $params['lt'] = $this->higher_bound; + $params['gt'] = $lower_bound; + } + } + if ($higher_bound !== null) { + $this->assertValueCompatible($higher_bound, $context); + if ($this->higher_inclusive) { + $params['lte'] = $higher_bound; + } else { + $params['lt'] = $higher_bound; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/TimestampKey.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/TimestampKey.php index b879945982..a5d34909d7 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/TimestampKey.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeyValue/TimestampKey.php @@ -34,6 +34,11 @@ class TimestampKey implements Key, Typed return FieldMapping::TYPE_DATE; } + public function getFieldType(QueryContext $context) + { + return FieldMapping::TYPE_DATE; + } + public function getIndexField(QueryContext $context, $raw = false) { return $this->index_field; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index ed9d3ceeed..63d9ec108e 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -396,10 +396,10 @@ class ElasticSearchEngine implements SearchEngineInterface if ($options->getDateFields() && ($options->getMaxDate() || $options->getMinDate())) { $range = []; if ($options->getMaxDate()) { - $range['lte'] = $options->getMaxDate()->format(FieldMapping::DATE_FORMAT_CAPTION_PHP); + $range['lte'] = $options->getMaxDate()->format('Y-m-d'); } if ($options->getMinDate()) { - $range['gte'] = $options->getMinDate()->format(FieldMapping::DATE_FORMAT_CAPTION_PHP); + $range['gte'] = $options->getMinDate()->format('Y-m-d'); } foreach ($options->getDateFields() as $dateField) { diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php index 076034b7b1..7a60b066e6 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/FieldMapping.php @@ -16,8 +16,7 @@ class FieldMapping const DATE_FORMAT_MYSQL = 'yyyy-MM-dd HH:mm:ss'; const DATE_FORMAT_CAPTION = 'yyyy/MM/dd'; // ES format - const DATE_FORMAT_MYSQL_OR_CAPTION = 'yyyy-MM-dd HH:mm:ss||yyyy/MM/dd'; - const DATE_FORMAT_CAPTION_PHP = 'Y/m/d'; // PHP format + const DATE_FORMAT_MYSQL_OR_CAPTION = 'yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy'; // Core types const TYPE_STRING = 'string'; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php index a96ef45f4a..128a16467f 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/DateFieldMapping.php @@ -57,6 +57,9 @@ class DateFieldMapping extends ComplexFieldMapping */ protected function getProperties() { - return array_merge([ 'format' => $this->format ], parent::getProperties()); + return array_merge([ + 'format' => $this->format, + 'ignore_malformed' => true + ], parent::getProperties()); } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php index ffb0c71adf..83ca222593 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php @@ -89,31 +89,45 @@ class RecordHelper return $this->collectionMap; } - /** - * @param string $date - * @return bool - */ - public static function validateDate($date) - { - $d = DateTime::createFromFormat(FieldMapping::DATE_FORMAT_CAPTION_PHP, $date); - - return $d && $d->format(FieldMapping::DATE_FORMAT_CAPTION_PHP) == $date; - } - /** * @param string $value * @return null|string */ public static function sanitizeDate($value) { - // introduced in https://github.com/alchemy-fr/Phraseanet/commit/775ce804e0257d3a06e4e068bd17330a79eb8370#diff-bee690ed259e0cf73a31dee5295d2edcR286 - // not sure if it's really needed + $v_fix = null; try { - $date = new \DateTime($value); - - return $date->format(FieldMapping::DATE_FORMAT_CAPTION_PHP); + $a = explode(';', preg_replace('/\D+/', ';', trim($value))); + switch (count($a)) { + case 1: // yyyy + $date = new \DateTime($a[0] . '-01-01'); // will throw if date is not valid + $v_fix = $date->format('Y'); + break; + case 2: // yyyy;mm + $date = new \DateTime( $a[0] . '-' . $a[1] . '-01'); + $v_fix = $date->format('Y-m'); + break; + case 3: // yyyy;mm;dd + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2]); + $v_fix = $date->format('Y-m-d'); + break; + case 4: + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':00:00'); + $v_fix = $date->format('Y-m-d H:i:s'); + break; + case 5: + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':' . $a[4] . ':00'); + $v_fix = $date->format('Y-m-d H:i:s'); + break; + case 6: + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':' . $a[4] . ':' . $a[5]); + $v_fix = $date->format('Y-m-d H:i:s'); + break; + } } catch (\Exception $e) { - return null; + // no-op, v_fix = null } + + return $v_fix; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php index 1acdd5935c..071ac94b1c 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryHelper.php @@ -110,41 +110,50 @@ class QueryHelper } } - public static function getRangeFromDateString($string) + public static function getRangeFromDateString($value) { - $formats = ['Y/m/d', 'Y/m', 'Y']; - $deltas = ['+1 day', '+1 month', '+1 year']; - $to = null; - while ($format = array_pop($formats)) { - $delta = array_pop($deltas); - $from = date_create_from_format($format, $string); - if ($from !== false) { - // Rewind to start of range - $month = 1; - $day = 1; - switch ($format) { - case 'Y/m/d': - $day = (int) $from->format('d'); - case 'Y/m': - $month = (int) $from->format('m'); - case 'Y': - $year = (int) $from->format('Y'); - } - date_date_set($from, $year, $month, $day); - date_time_set($from, 0, 0, 0); - // Create end of the the range - $to = date_modify(clone $from, $delta); - break; + $date_from = null; + $date_to = null; + try { + $a = explode(';', preg_replace('/\D+/', ';', trim($value))); + switch (count($a)) { + case 1: // yyyy + $date_to = clone($date_from = new \DateTime($a[0] . '-01-01 00:00:00')); // will throw if date is not valid + $date_to->add(new \DateInterval('P1Y')); + break; + case 2: // yyyy;mm + $date_to = clone($date_from = new \DateTime($a[0] . '-' . $a[1] . '-01 00:00:00')); // will throw if date is not valid + $date_to->add(new \DateInterval('P1M')); + break; + case 3: // yyyy;mm;dd + $date_to = clone($date_from = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' 00:00:00')); // will throw if date is not valid + $date_to->add(new \DateInterval('P1D')); + break; + case 4: + $date_to = clone($date_from = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':00:00')); + $date_to->add(new \DateInterval('PT1H')); + break; + case 5: + $date_to = clone($date_from = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':' . $a[4] . ':00')); + $date_to->add(new \DateInterval('PT1M')); + break; + case 6: + $date_to = clone($date_from = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':' . $a[4] . ':' . $a[5])); + // $date_to->add(new \DateInterval('PT1S')); // no need since precision is 1 sec, a "equal" will be generated when from==to + break; } } + catch (\Exception $e) { + // no-op + } - if (!$from || !$to) { - throw new \InvalidArgumentException(sprintf('Invalid date "%s".', $string)); + if ($date_from === null || $date_to === null) { + throw new \InvalidArgumentException(sprintf('Invalid date "%s".', $value)); } return [ - 'from' => $from->format(FieldMapping::DATE_FORMAT_CAPTION_PHP), - 'to' => $to->format(FieldMapping::DATE_FORMAT_CAPTION_PHP) + 'from' => $date_from->format('Y-m-d H:i:s'), + 'to' => $date_to->format('Y-m-d H:i:s') ]; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryVisitor.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryVisitor.php index 522666bcaa..52c0fa54a6 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryVisitor.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Search/QueryVisitor.php @@ -5,7 +5,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Search; use Alchemy\Phrasea\SearchEngine\Elastic\AST; use Alchemy\Phrasea\SearchEngine\Elastic\Exception\Exception; use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; -use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; +use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Alchemy\Phrasea\SearchEngine\Elastic\Structure\Structure; use Hoa\Compiler\Llk\TreeNode; use Hoa\Visitor\Element; @@ -166,6 +166,12 @@ class QueryVisitor implements Visit $key = $node->getChild(0)->accept($this); $boundary = $node->getChild(1)->accept($this); + if ($this->isDateKey($key)) { + if(($v = RecordHelper::sanitizeDate($boundary)) !== null) { + $boundary = $v; + } + } + switch ($node->getId()) { case NodeTypes::LT_EXPR: return AST\KeyValue\RangeExpression::lessThan($key, $boundary); @@ -195,11 +201,15 @@ class QueryVisitor implements Visit try { // Try to create a range for incomplete dates $range = QueryHelper::getRangeFromDateString($right); - return new AST\KeyValue\RangeExpression( - $left, - $range['from'], true, - $range['to'], false - ); + if ($range['from'] === $range['to']) { + return new AST\KeyValue\EqualExpression($left, $range['from']); + } else { + return new AST\KeyValue\RangeExpression( + $left, + $range['from'], true, + $range['to'], false + ); + } } catch (\InvalidArgumentException $e) { // Fall back to equal expression } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/ValueChecker.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/ValueChecker.php index 31f65c580e..f27defa2bb 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/ValueChecker.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Structure/ValueChecker.php @@ -3,7 +3,6 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Structure; use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; -use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Assert\Assertion; @@ -20,7 +19,7 @@ class ValueChecker { Assertion::allIsInstanceOf($list, Typed::class); $is_numeric = is_numeric($value); - $is_valid_date = RecordHelper::validateDate($value); + $is_valid_date = (RecordHelper::sanitizeDate($value) !== null); $filtered = []; foreach ($list as $item) { switch ($item->getType()) { diff --git a/lib/Alchemy/Phrasea/TaskManager/Job/WriteMetadataJob.php b/lib/Alchemy/Phrasea/TaskManager/Job/WriteMetadataJob.php index f35b8e806e..795b0b2a83 100644 --- a/lib/Alchemy/Phrasea/TaskManager/Job/WriteMetadataJob.php +++ b/lib/Alchemy/Phrasea/TaskManager/Job/WriteMetadataJob.php @@ -127,7 +127,10 @@ class WriteMetadataJob extends AbstractJob // check exiftool known tags to skip Phraseanet:tf-* try { - TagFactory::getFromRDFTagname($tagName); + $tag = TagFactory::getFromRDFTagname($tagName); + if(!$tag->isWritable()) { + continue; + } } catch (TagUnknown $e) { continue; } @@ -147,21 +150,34 @@ class WriteMetadataJob extends AbstractJob $fieldValue = array_pop($fieldValues); $value = $this->removeNulChar($fieldValue->getValue()); - $value = new Value\Mono($value); + // fix the dates edited into phraseanet + if($fieldStructure->get_type() === $fieldStructure::TYPE_DATE) { + try { + $value = self::fixDate($value); // will return NULL if the date is not valid + } + catch (\Exception $e) { + $value = null; // do NOT write back to iptc + } + } + + if($value !== null) { // do not write invalid dates + $value = new Value\Mono($value); + } } - } catch(\Exception $e) { + } catch (\Exception $e) { // the field is not set in the record, erase it if ($fieldStructure->is_multi()) { $value = new Value\Multi(array('')); - } - else { + } else { $value = new Value\Mono(''); } } - $metadata->add( - new Metadata\Metadata($fieldStructure->get_tag(), $value) - ); + if($value !== null) { // do not write invalid data + $metadata->add( + new Metadata\Metadata($fieldStructure->get_tag(), $value) + ); + } } $writer = $this->getMetadataWriter($jobData->getApplication()); @@ -220,4 +236,34 @@ class WriteMetadataJob extends AbstractJob { return str_replace("\0", "", $value); } + + /** + * re-format a phraseanet date for iptc writing + * return NULL if the date is not valid + * + * @param string $value + * @return string|null + */ + private static function fixDate($value) + { + $date = null; + try { + $a = explode(';', preg_replace('/\D+/', ';', trim($value))); + switch (count($a)) { + case 3: // yyyy;mm;dd + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2]); + $date = $date->format('Y-m-d H:i:s'); + break; + case 6: // yyyy;mm;dd;hh;mm;ss + $date = new \DateTime($a[0] . '-' . $a[1] . '-' . $a[2] . ' ' . $a[3] . ':' . $a[4] . ':' . $a[5]); + $date = $date->format('Y-m-d H:i:s'); + break; + } + } + catch (\Exception $e) { + $date = null; + } + + return $date; + } } diff --git a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/RangeExpressionTest.php b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/RangeExpressionTest.php index 88a850fe03..d1d376e37b 100644 --- a/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/RangeExpressionTest.php +++ b/tests/Alchemy/Tests/Phrasea/SearchEngine/AST/RangeExpressionTest.php @@ -58,6 +58,7 @@ class RangeExpressionTest extends \PHPUnit_Framework_TestCase { $query_context = $this->prophesize(QueryContext::class)->reveal(); $key_prophecy = $this->prophesize(Key::class); + $key_prophecy->getFieldType($query_context)->willReturn('text'); $key_prophecy->getIndexField($query_context)->willReturn('foo'); $key_prophecy->isValueCompatible('bar', $query_context)->willReturn(true); $key = $key_prophecy->reveal(); @@ -73,6 +74,7 @@ class RangeExpressionTest extends \PHPUnit_Framework_TestCase { $query_context = $this->prophesize(QueryContext::class)->reveal(); $key = $this->prophesize(FieldKey::class); + $key->getFieldType($query_context)->willReturn('text'); $key->getIndexField($query_context)->willReturn('baz'); $key->isValueCompatible('bar', $query_context)->willReturn(true); $key->postProcessQuery(Argument::any(), $query_context)->willReturnArgument(0); diff --git a/tests/Alchemy/Tests/Phrasea/SearchEngine/resources/queries.csv b/tests/Alchemy/Tests/Phrasea/SearchEngine/resources/queries.csv index 9f7d079880..5252788eaa 100644 --- a/tests/Alchemy/Tests/Phrasea/SearchEngine/resources/queries.csv +++ b/tests/Alchemy/Tests/Phrasea/SearchEngine/resources/queries.csv @@ -53,6 +53,7 @@ foo < 42| foo ≤ 42| foo > 42| foo ≥ 42| +foo = 2015/01/01|( == ) foo < 2015/01/01| foo ≤ 2015/01/01| foo > 2015/01/01| @@ -93,19 +94,93 @@ id:90 AND foo|( AND ) id:90 foo|( AND ) recordid:90| -# Timestamps -created_on < "2015/01/01"| -created_on ≤ "2015/01/01"| -created_on = "2015/01/01"| -created_on ≥ "2015/01/01"| -created_on > "2015/01/01"| -updated_on < "2015/01/01"| -updated_on ≤ "2015/01/01"| -updated_on = "2015/01/01"| -updated_on ≥ "2015/01/01"| -updated_on > "2015/01/01"| -created_at > "2015/01/01"| -updated_at > "2015/01/01"| +# Timestamps yyyy +created_on < "2015"| +created_on ≤ "2015"| +created_on = "2015"| +created_on ≥ "2015"| +created_on > "2015"| +updated_on < "2015"| +updated_on ≤ "2015"| +updated_on = "2015"| +updated_on ≥ "2015"| +updated_on > "2015"| +created_at > "2015"| +updated_at > "2015"| + +# Timestamps yyyy/mm +created_on < "2015/01"| +created_on ≤ "2015/01"| +created_on = "2015/01"| +created_on ≥ "2015/01"| +created_on > "2015/01"| +updated_on < "2015/01"| +updated_on ≤ "2015/01"| +updated_on = "2015/01"| +updated_on ≥ "2015/01"| +updated_on > "2015/01"| +created_at > "2015/01"| +updated_at > "2015/01"| + +# Timestamps yyyy/mm/dd +created_on < "2015/01/01"| +created_on ≤ "2015/01/01"| +created_on = "2015/01/01"| +created_on ≥ "2015/01/01"| +created_on > "2015/01/01"| +updated_on < "2015/01/01"| +updated_on ≤ "2015/01/01"| +updated_on = "2015/01/01"| +updated_on ≥ "2015/01/01"| +updated_on > "2015/01/01"| +created_at > "2015/01/01"| +updated_at > "2015/01/01"| + +# Timestamps yyyy/mm/dd hh +created_on < "2015/01/01 12"| +created_on ≤ "2015/01/01 12"| +created_on = "2015/01/01 12"| +created_on ≥ "2015/01/01 12"| +created_on > "2015/01/01 12"| +updated_on < "2015/01/01 12"| +updated_on ≤ "2015/01/01 12"| +updated_on = "2015/01/01 12"| +updated_on ≥ "2015/01/01 12"| +updated_on > "2015/01/01 12"| +created_at > "2015/01/01 12"| +updated_at > "2015/01/01 12"| + +# Timestamps yyyy/mm/dd hh:mm +created_on < "2015/01/01 12.34"| +created_on ≤ "2015/01/01 12.34"| +created_on = "2015/01/01 12.34"| +created_on ≥ "2015/01/01 12.34"| +created_on > "2015/01/01 12.34"| +updated_on < "2015/01/01 12.34"| +updated_on ≤ "2015/01/01 12.34"| +updated_on = "2015/01/01 12.34"| +updated_on ≥ "2015/01/01 12.34"| +updated_on > "2015/01/01 12.34"| +created_at > "2015/01/01 12.34"| +updated_at > "2015/01/01 12.34"| + +# Timestamps yyyy/mm/dd hh.mm.ss +created_on < "2015/01/01 12.34.56"| +created_on ≤ "2015/01/01 12.34.56"| +created_on = "2015/01/01 12.34.56"|( == ) +created_on ≥ "2015/01/01 12.34.56"| +created_on > "2015/01/01 12.34.56"| +updated_on < "2015/01/01 12.34.56"| +updated_on ≤ "2015/01/01 12.34.56"| +updated_on = "2015/01/01 12.34.56"|( == ) +updated_on ≥ "2015/01/01 12.34.56"| +updated_on > "2015/01/01 12.34.56"| +created_at > "2015/01/01 12.34.56"| +updated_at > "2015/01/01 12.34.56"| + +# timestamps missing zeros +created_on = "2015/1/2 1.3.5"|( == ) + # Flag matcher flag.foo:true| From bd53e98b0d8b1529197adb90fb1e55e3dc749829 Mon Sep 17 00:00:00 2001 From: aynsix Date: Thu, 15 Aug 2019 16:09:35 +0400 Subject: [PATCH 4/4] port to 4.1 fix ES index NUL --- .../Elastic/Indexer/BulkOperation.php | 9 ++-- .../Record/Hydrator/MetadataHydrator.php | 52 ++++--------------- .../Indexer/Record/Hydrator/TitleHydrator.php | 45 +++++++++------- .../SearchEngine/Elastic/RecordHelper.php | 27 ++++++++++ 4 files changed, 67 insertions(+), 66 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php index 6ea641492d..b8c834d9f0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/BulkOperation.php @@ -155,15 +155,16 @@ class BulkOperation // nb: results (items) are returned IN THE SAME ORDER as commands were pushed in the stack // so the items[X] match the operationIdentifiers[X] foreach ($response['items'] as $key => $item) { - foreach($item as $command=>$result) { // command may be "index" or "delete" - if($response['errors'] && $result['status'] >= 400) { // 4xx or 5xx error - throw new Exception(sprintf('%d: %s', $key, var_export($result, true))); + foreach ($item as $command=>$result) { // command may be "index" or "delete" + if ($response['errors'] && $result['status'] >= 400) { // 4xx or 5xx + $err = array_key_exists('error', $result) ? var_export($result['error'], true) : ($command . " error " . $result['status']); + throw new Exception(sprintf('%d: %s', $key, $err)); } } $operationIdentifier = $this->operationIdentifiers[$key]; - if(is_string($operationIdentifier) || is_int($operationIdentifier)) { // dont include null keys + if (is_string($operationIdentifier) || is_int($operationIdentifier)) { // dont include null keys $callbackData[$operationIdentifier] = $response['items'][$key]; } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php index 32b93502d4..5dc2471cb9 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/MetadataHydrator.php @@ -39,18 +39,13 @@ class MetadataHydrator implements HydratorInterface public function hydrateRecords(array &$records) { - $sql = <<connection->executeQuery( @@ -62,7 +57,7 @@ SQL; while ($metadata = $statement->fetch()) { // Store metadata value $key = $metadata['key']; - $value = $metadata['value']; + $value = trim($metadata['value']); // Do not keep empty values if ($key === '' || $value === '') { @@ -80,7 +75,7 @@ SQL; case 'caption': // Sanitize fields $value = StringHelper::crlfNormalize($value); - $value = $this->sanitizeValue($value, $this->structure->typeOf($key)); + $value = $this->helper->sanitizeValue($value, $this->structure->typeOf($key)); // Private caption fields are kept apart $type = $metadata['private'] ? 'private_caption' : 'caption'; // Caption are multi-valued @@ -103,7 +98,7 @@ SQL; } $tag = $this->structure->getMetadataTagByName($key); if ($tag) { - $value = $this->sanitizeValue($value, $tag->getType()); + $value = $this->helper->sanitizeValue($value, $tag->getType()); } // EXIF data is single-valued $record['metadata_tags'][$key] = $value; @@ -118,33 +113,6 @@ SQL; $this->clearGpsPositionBuffer(); } - private function sanitizeValue($value, $type) - { - switch ($type) { - case FieldMapping::TYPE_STRING: - return str_replace("\0", "", $value); - - case FieldMapping::TYPE_DATE: - return $this->helper->sanitizeDate($value); - - case FieldMapping::TYPE_FLOAT: - case FieldMapping::TYPE_DOUBLE: - return (float) $value; - - case FieldMapping::TYPE_INTEGER: - case FieldMapping::TYPE_LONG: - case FieldMapping::TYPE_SHORT: - case FieldMapping::TYPE_BYTE: - return (int) $value; - - case FieldMapping::TYPE_BOOLEAN: - return (bool) $value; - - default: - return $value; - } - } - private function handleGpsPosition(&$records, $id, $tag_name, $value) { // Get position object diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php index 88382aa7a7..76dc4704b4 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/Record/Hydrator/TitleHydrator.php @@ -11,6 +11,8 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Hydrator; +use Alchemy\Phrasea\SearchEngine\Elastic\FieldMapping; +use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper; use Doctrine\DBAL\Connection; use Doctrine\DBAL\Driver\Connection as DriverConnection; @@ -18,31 +20,34 @@ class TitleHydrator implements HydratorInterface { private $connection; - public function __construct(DriverConnection $connection) + /** @var RecordHelper */ + private $helper; + + public function __construct(DriverConnection $connection, RecordHelper $helper) { $this->connection = $connection; + $this->helper = $helper; } public function hydrateRecords(array &$records) { - $sql = <<connection->executeQuery( $sql, array(array_keys($records)), @@ -50,7 +55,7 @@ SQL; ); while ($row = $statement->fetch()) { - $records[$row['record_id']]['title'][$row['locale']] = $row['title']; + $records[$row['record_id']]['title'][$row['locale']] = $this->helper->sanitizeValue($row['title'], FieldMapping::TYPE_STRING); } } } diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php index ffb0c71adf..5debeff82e 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/RecordHelper.php @@ -116,4 +116,31 @@ class RecordHelper return null; } } + + public function sanitizeValue($value, $type) + { + switch ($type) { + case FieldMapping::TYPE_DATE: + return self::sanitizeDate($value); + + case FieldMapping::TYPE_FLOAT: + case FieldMapping::TYPE_DOUBLE: + return (float) $value; + + case FieldMapping::TYPE_INTEGER: + case FieldMapping::TYPE_LONG: + case FieldMapping::TYPE_SHORT: + case FieldMapping::TYPE_BYTE: + return (int) $value; + + case FieldMapping::TYPE_BOOLEAN: + return (bool) $value; + + case FieldMapping::TYPE_STRING: + return str_replace("\0", '', $value); + + default: + return $value; + } + } }