diff --git a/lib/Alchemy/Phrasea/Border/Manager.php b/lib/Alchemy/Phrasea/Border/Manager.php index 34de0ffa2b..df18c404d8 100644 --- a/lib/Alchemy/Phrasea/Border/Manager.php +++ b/lib/Alchemy/Phrasea/Border/Manager.php @@ -13,20 +13,9 @@ namespace Alchemy\Phrasea\Border; use Alchemy\Phrasea\Border\Checker\CheckerInterface; use Alchemy\Phrasea\Border\Attribute\AttributeInterface; -use Alchemy\Phrasea\Metadata\Tag\PdfText; use Alchemy\Phrasea\Metadata\Tag\TfArchivedate; -use Alchemy\Phrasea\Metadata\Tag\TfBasename; -use Alchemy\Phrasea\Metadata\Tag\TfBits; -use Alchemy\Phrasea\Metadata\Tag\TfChannels; -use Alchemy\Phrasea\Metadata\Tag\TfDuration; -use Alchemy\Phrasea\Metadata\Tag\TfExtension; -use Alchemy\Phrasea\Metadata\Tag\TfFilename; -use Alchemy\Phrasea\Metadata\Tag\TfHeight; -use Alchemy\Phrasea\Metadata\Tag\TfMimetype; use Alchemy\Phrasea\Metadata\Tag\TfQuarantine; use Alchemy\Phrasea\Metadata\Tag\TfRecordid; -use Alchemy\Phrasea\Metadata\Tag\TfSize; -use Alchemy\Phrasea\Metadata\Tag\TfWidth; use Alchemy\Phrasea\Border\Attribute\Metadata as MetadataAttr; use Entities\LazaretAttribute; use Entities\LazaretFile; @@ -35,9 +24,9 @@ use MediaAlchemyst\Exception\ExceptionInterface as MediaAlchemystException; use MediaAlchemyst\Specification\Image as ImageSpec; use PHPExiftool\Driver\Metadata\Metadata; use PHPExiftool\Driver\Value\Mono as MonoValue; +use PHPExiftool\Driver\Value\Multi; use Silex\Application; use Symfony\Component\Filesystem\Exception\IOException; -use XPDF\PdfToText; /** * Phraseanet Border Manager @@ -51,7 +40,6 @@ class Manager protected $checkers = array(); protected $app; protected $filesystem; - protected $pdfToText; const RECORD_CREATED = 1; const LAZARET_CREATED = 2; @@ -77,41 +65,17 @@ class Manager $this->app = null; } - /** - * Sets a PdfToText driver for extracting PDF content. - * - * @param PdfTotext $pdfToText The PdfToText Object - * - * @return Manager - */ - public function setPdfToText(PdfToText $pdfToText) - { - $this->pdfToText = $pdfToText; - - return $this; - } - - /** - * Gets the PdfToText driver. - * - * @return PdfTotext - */ - public function getPdfToText() - { - return $this->pdfToText; - } - /** * Add a file to Phraseanet after having checked it * - * @param LazaretSession $session The current Lazaret Session - * @param File $file A File package object - * @param type $callable A callback to execute after process - * (arguments are $element (LazaretFile or \record_adapter), - * $visa (Visa) - * and $code (self::RECORD_CREATED or self::LAZARET_CREATED)) - * @param type $forceBehavior Force a behavior, one of the self::FORCE_* constant - * @return int One of the self::RECORD_CREATED or self::LAZARET_CREATED constants + * @param LazaretSession $session The current Lazaret Session + * @param File $file A File package object + * @param type $callable A callback to execute after process + * (arguments are $element (LazaretFile or \record_adapter), + * $visa (Visa) + * and $code (self::RECORD_CREATED or self::LAZARET_CREATED)) + * @param type $forceBehavior Force a behavior, one of the self::FORCE_* constant + * @return int One of the self::RECORD_CREATED or self::LAZARET_CREATED constants */ public function process(LazaretSession $session, File $file, $callable = null, $forceBehavior = null) { @@ -278,42 +242,7 @@ class Manager ) ); - $metadatas = array(); - - /** - * @todo $key is not tagname but fieldname - */ - $fieldToKeyMap = array(); - - if (! $fieldToKeyMap) { - foreach ($file->getCollection()->get_databox()->get_meta_structure() as $databox_field) { - - $tagname = $databox_field->get_tag()->getTagname(); - - if ( ! isset($fieldToKeyMap[$tagname])) { - $fieldToKeyMap[$tagname] = array(); - } - - $fieldToKeyMap[$tagname][] = $databox_field->get_name(); - } - } - - foreach ($file->getMedia()->getMetadatas() as $metadata) { - - $key = $metadata->getTag()->getTagname(); - - if ( ! isset($fieldToKeyMap[$key])) { - continue; - } - - foreach ($fieldToKeyMap[$key] as $k) { - if ( ! isset($metadatas[$k])) { - $metadatas[$k] = array(); - } - - $metadatas[$k] = array_merge($metadatas[$k], $metadata->getValue()->asArray()); - } - } + $newMetadata = $file->getMedia()->getMetadatas()->toArray(); foreach ($file->getAttributes() as $attribute) { switch ($attribute->getName()) { @@ -323,31 +252,13 @@ class Manager * current metadata is metadata by source. */ case AttributeInterface::NAME_METAFIELD: - - $key = $attribute->getField()->get_name(); - - if ( ! isset($metadatas[$key])) { - $metadatas[$key] = array(); - } - - $metadatas[$key] = array_merge($metadatas[$key], $attribute->getValue()); + $values = $attribute->getValue(); + $value = $attribute->getField()->is_multi() ? new Multi($values) : new MonoValue(array_pop($values)); + $newMetadata[] = new Metadata($attribute->getField()->get_tag(), $value); break; case AttributeInterface::NAME_METADATA: - - $key = $attribute->getValue()->getTag()->getTagname(); - - if ( ! isset($fieldToKeyMap[$key])) { - continue; - } - - foreach ($fieldToKeyMap[$key] as $k) { - if ( ! isset($metadatas[$k])) { - $metadatas[$k] = array(); - } - - $metadatas[$k] = array_merge($metadatas[$k], $attribute->getValue()->getValue()->asArray()); - } + $newMetadata[] = $attribute->getValue(); break; case AttributeInterface::NAME_STATUS: $element->set_binary_status(decbin(bindec($element->get_status()) | bindec($attribute->getValue()))); @@ -365,55 +276,7 @@ class Manager } } - $databox = $element->get_databox(); - - $metas = array(); - - foreach ($metadatas as $fieldname => $values) { - foreach ($databox->get_meta_structure()->get_elements() as $databox_field) { - - if ($databox_field->get_name() == $fieldname) { - - if ($databox_field->is_multi()) { - - $tmpValues = array(); - foreach ($values as $value) { - $tmpValues = array_merge($tmpValues, \caption_field::get_multi_values($value, $databox_field->get_separator())); - } - - $values = array_unique($tmpValues); - - foreach ($values as $value) { - if ( ! trim($value)) { - continue; - } - $metas[] = array( - 'meta_struct_id' => $databox_field->get_id(), - 'meta_id' => null, - 'value' => $value, - ); - } - } else { - - $value = array_pop($values); - - if ( ! trim($value)) { - continue; - } - - $metas[] = array( - 'meta_struct_id' => $databox_field->get_id(), - 'meta_id' => null, - 'value' => $value, - ); - } - } - } - } - - if ($metas) { - $element->set_metadatas($metas, true); - } + $this->app['phraseanet.metadata-setter']->replaceMetadata($newMetadata, $element); $element->rebuild_subdefs(); $element->reindex(); @@ -505,106 +368,15 @@ class Manager * Add technical Metadata attribute to a package file by reference to add it * to Phraseanet * - * @param File $file The file - * @return \Doctrine\ORM\EntityManager + * @param File $file The file */ protected function addMediaAttributes(File $file) { + $metadataCollection = $this->app['phraseanet.metadata-reader']->read($file->getMedia()); - if (method_exists($file->getMedia(), 'getWidth')) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfWidth(), new MonoValue($file->getMedia()->getWidth()) - ) - ) - ); - } - if (method_exists($file->getMedia(), 'getHeight')) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfHeight(), new MonoValue($file->getMedia()->getHeight()) - ) - ) - ); - } - if (method_exists($file->getMedia(), 'getChannels')) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfChannels(), new MonoValue($file->getMedia()->getChannels()) - ) - ) - ); - } - if (method_exists($file->getMedia(), 'getColorDepth')) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfBits(), new MonoValue($file->getMedia()->getColorDepth()) - ) - ) - ); - } - if (method_exists($file->getMedia(), 'getDuration')) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfDuration(), new MonoValue($file->getMedia()->getDuration()) - ) - ) - ); - } - - if ($file->getFile()->getMimeType() == 'application/pdf' && null !== $this->pdfToText) { - - try { - $text = $this->pdfToText->getText($file->getFile()->getRealPath()); - - if (trim($text)) { - $file->addAttribute( - new MetadataAttr( - new Metadata( - new PdfText(), new MonoValue($text) - ) - ) - ); - } - } catch (\XPDF\Exception\Exception $e) { - - } - } - - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfMimetype(), new MonoValue($file->getFile()->getMimeType())))); - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfSize(), new MonoValue($file->getFile()->getSize())))); - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfBasename(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_BASENAME)) - ) - ) - ); - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfFilename(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_FILENAME)) - ) - ) - ); - $file->addAttribute( - new MetadataAttr( - new Metadata( - new TfExtension(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_EXTENSION)) - ) - ) - ); + array_walk($metadataCollection, function (Metadata $metadata) use ($file) { + $file->addAttribute(new MetadataAttr($metadata)); + }); return $this; } diff --git a/lib/Alchemy/Phrasea/Controller/Prod/Tools.php b/lib/Alchemy/Phrasea/Controller/Prod/Tools.php index b7919f9b62..aff0dd025a 100644 --- a/lib/Alchemy/Phrasea/Controller/Prod/Tools.php +++ b/lib/Alchemy/Phrasea/Controller/Prod/Tools.php @@ -143,6 +143,9 @@ class Tools implements ControllerProviderInterface $media = $app['mediavorus']->guess($tempoFile); $record->substitute_subdef('document', $media, $app); + $record->insertTechnicalDatas($app['mediavorus']); + $app['phraseanet.metadata-setter']->replaceMetadata($app['phraseanet.metadata-reader']->read($media), $record); + $app['phraseanet.logger']($record->get_databox())->log( $record, \Session_Logger::EVENT_SUBSTITUTE, diff --git a/lib/Alchemy/Phrasea/Core/Provider/BorderManagerServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/BorderManagerServiceProvider.php index 37791357fd..626e2b9a22 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/BorderManagerServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/BorderManagerServiceProvider.php @@ -15,7 +15,6 @@ use Alchemy\Phrasea\Border\Manager; use Alchemy\Phrasea\Border\MimeGuesserConfiguration; use Silex\Application; use Silex\ServiceProviderInterface; -use XPDF\Exception\BinaryNotFoundException; class BorderManagerServiceProvider implements ServiceProviderInterface { @@ -25,12 +24,6 @@ class BorderManagerServiceProvider implements ServiceProviderInterface $app['border-manager'] = $app->share(function (Application $app) { $borderManager = new Manager($app); - try { - $borderManager->setPdfToText($app['xpdf.pdftotext']); - } catch (BinaryNotFoundException $e) { - - } - $options = $app['phraseanet.configuration']['border-manager']; $registeredCheckers = array(); diff --git a/lib/Alchemy/Phrasea/Core/Provider/PhraseanetServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/PhraseanetServiceProvider.php index 24c5c6dd9c..1719745b3a 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/PhraseanetServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/PhraseanetServiceProvider.php @@ -11,9 +11,12 @@ namespace Alchemy\Phrasea\Core\Provider; +use Alchemy\Phrasea\Metadata\PhraseanetMetadataReader; +use Alchemy\Phrasea\Metadata\PhraseanetMetadataSetter; use Alchemy\Phrasea\Security\Firewall; use Silex\Application as SilexApplication; use Silex\ServiceProviderInterface; +use XPDF\Exception\BinaryNotFoundException; class PhraseanetServiceProvider implements ServiceProviderInterface { @@ -37,6 +40,22 @@ class PhraseanetServiceProvider implements ServiceProviderInterface return $events; }); + + $app['phraseanet.metadata-reader'] = $app->share(function (SilexApplication $app) { + $reader = new PhraseanetMetadataReader(); + + try { + $reader->setPdfToText($app['xpdf.pdftotext']); + } catch (BinaryNotFoundException $e) { + + } + + return $reader; + }); + + $app['phraseanet.metadata-setter'] = $app->share(function (SilexApplication $app) { + return new PhraseanetMetadataSetter(); + }); } public function boot(SilexApplication $app) diff --git a/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataReader.php b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataReader.php new file mode 100644 index 0000000000..e7d5926c31 --- /dev/null +++ b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataReader.php @@ -0,0 +1,90 @@ +pdfToText = $pdfToText; + + return $this; + } + + /** + * Gets the PdfToText driver. + * + * @return PdfTotext + */ + public function getPdfToText() + { + return $this->pdfToText; + } + + public function read(MediaInterface $media) + { + $ret = array(); + $mimeType = $media->getFile()->getMimeType(); + + foreach (array( + 'getWidth' => 'TfWidth', + 'getHeight' => 'TfHeight', + 'getChannels' => 'TfChannels', + 'getColorDepth' => 'TfBits', + 'getDuration' => 'TfDuration', + ) as $method => $tag) { + $classname = 'Alchemy\\Phrasea\\Metadata\\Tag\\'.$tag; + if (method_exists($media, $method)) { + $ret[] = new Metadata(new $classname(), new MonoValue(call_user_func(array($media, $method)))); + } + } + + if ($mimeType == 'application/pdf' && null !== $this->pdfToText) { + try { + $text = $this->pdfToText->getText($media->getFile()->getRealPath()); + if (trim($text)) { + $ret[] = new Metadata(new PdfText(), new MonoValue($text)); + } + } catch (XPDFException $e) { + } + } + + $ret[] = new Metadata(new TfMimetype(), new MonoValue($mimeType)); + $ret[] = new Metadata(new TfSize(), new MonoValue($media->getFile()->getSize())); + $ret[] = new Metadata(new TfBasename(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_BASENAME))); + $ret[] = new Metadata(new TfFilename(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_FILENAME))); + $ret[] = new Metadata(new TfExtension(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_EXTENSION))); + + return $ret; + } +} diff --git a/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php new file mode 100644 index 0000000000..840fe93acb --- /dev/null +++ b/lib/Alchemy/Phrasea/Metadata/PhraseanetMetadataSetter.php @@ -0,0 +1,98 @@ +get_databox()->get_meta_structure()); + + array_walk($arrayStructure, function ($databoxField) use (&$tagnameToFieldnameMapping) { + $tagname = $databoxField->get_tag()->getTagname(); + $tagnameToFieldnameMapping[$tagname][] = $databoxField->get_name(); + }); + + array_walk($metadataCollection, function (Metadata $metadata) use (&$metadatas, $tagnameToFieldnameMapping) { + $tagname = $metadata->getTag()->getTagname(); + + if (!isset($tagnameToFieldnameMapping[$tagname])) { + return; + } + + foreach ($tagnameToFieldnameMapping[$tagname] as $fieldname) { + if ( ! isset($metadatas[$fieldname])) { + $metadatas[$fieldname] = array(); + } + $metadatas[$fieldname] = array_merge($metadatas[$fieldname], $metadata->getValue()->asArray()); + } + }); + + $metas = array(); + + array_walk($arrayStructure, function (\databox_field $field) use (&$metas, $metadatas, $record) { + $fieldname = $field->get_name(); + + if (!isset($metadatas[$fieldname])) { + return; + } + + $values = $metadatas[$fieldname]; + + if ($record->get_caption()->has_field($fieldname)) { + foreach ($record->get_caption()->get_field($fieldname)->get_values() as $value) { + $value->delete(); + } + } + + if ($field->is_multi()) { + $tmpValues = array(); + foreach ($values as $value) { + $tmpValues = array_merge($tmpValues, \caption_field::get_multi_values($value, $field->get_separator())); + } + + $values = array_unique($tmpValues); + + foreach ($values as $value) { + if (trim($value) === '') { + continue; + } + $metas[] = array( + 'meta_struct_id' => $field->get_id(), + 'meta_id' => null, + 'value' => $value, + ); + } + } else { + $value = array_pop($values); + if (trim($value) === '') { + return; + } + + $metas[] = array( + 'meta_struct_id' => $field->get_id(), + 'meta_id' => null, + 'value' => $value, + ); + } + }); + + if (count($metas) > 0) { + $record->set_metadatas($metas, true); + } + } +} diff --git a/lib/classes/caption/field.php b/lib/classes/caption/field.php index 82d9e4a3b1..f816137f87 100644 --- a/lib/classes/caption/field.php +++ b/lib/classes/caption/field.php @@ -180,7 +180,7 @@ class caption_field implements cache_cacheableInterface /** * - * @return array + * @return \caption_Field_Value[] */ public function get_values() { diff --git a/lib/classes/caption/record.php b/lib/classes/caption/record.php index 422245605c..1c1b8d21ab 100644 --- a/lib/classes/caption/record.php +++ b/lib/classes/caption/record.php @@ -254,7 +254,7 @@ class caption_record implements caption_interface, cache_cacheableInterface */ public function get_field($fieldname) { - foreach ($this->get_fields(null, true) as $meta_struct_id => $field) { + foreach ($this->get_fields(null, true) as $field) { if ($field->get_name() == $fieldname) { return $field; } @@ -263,6 +263,17 @@ class caption_record implements caption_interface, cache_cacheableInterface throw new \Exception('Field not found'); } + public function has_field($fieldname) + { + foreach ($this->get_fields(null, true) as $field) { + if ($field->get_name() == $fieldname) { + return true; + } + } + + return false; + } + /** * * @param type $label diff --git a/lib/classes/record/adapter.php b/lib/classes/record/adapter.php index 2563c29e98..2be51f8cd1 100644 --- a/lib/classes/record/adapter.php +++ b/lib/classes/record/adapter.php @@ -1371,7 +1371,12 @@ class record_adapter implements record_Interface, cache_cacheableInterface return $this; } - $sql = 'REPLACE INTO technical_datas (id, record_id, name, value) + $sql = 'DELETE FROM technical_datas WHERE record_id = :record_id'; + $stmt = $this->get_databox()->get_connection()->prepare($sql); + $stmt->execute(array(':record_id' => $this->get_record_id())); + $stmt->closeCursor(); + + $sql = 'INSERT INTO technical_datas (id, record_id, name, value) VALUES (null, :record_id, :name, :value)'; $stmt = $this->get_databox()->get_connection()->prepare($sql); diff --git a/tests/Alchemy/Tests/Phrasea/Border/ManagerTest.php b/tests/Alchemy/Tests/Phrasea/Border/ManagerTest.php index f37ac57c5e..d6c9c09f27 100644 --- a/tests/Alchemy/Tests/Phrasea/Border/ManagerTest.php +++ b/tests/Alchemy/Tests/Phrasea/Border/ManagerTest.php @@ -408,7 +408,7 @@ class ManagerTest extends \PhraseanetWebTestCaseAuthenticatedAbstract ->getMock(); $manager = new ManagerTester(self::$DI['app']); - $manager->setPdfToText($pdfToText); + self::$DI['app']['phraseanet.metadata-reader']->setPdfToText($pdfToText); $pdfToText->expects($this->once()) ->method('getText') diff --git a/tests/Alchemy/Tests/Phrasea/Core/Provider/BorderManagerServiceProviderTest.php b/tests/Alchemy/Tests/Phrasea/Core/Provider/BorderManagerServiceProviderTest.php index 307e7cce60..b141577c4f 100644 --- a/tests/Alchemy/Tests/Phrasea/Core/Provider/BorderManagerServiceProviderTest.php +++ b/tests/Alchemy/Tests/Phrasea/Core/Provider/BorderManagerServiceProviderTest.php @@ -3,6 +3,7 @@ namespace Alchemy\Tests\Phrasea\Core\Provider; use Alchemy\Phrasea\Core\Provider\BorderManagerServiceProvider; +use Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider; use Silex\Application; use Symfony\Component\Process\ExecutableFinder; use XPDF\XPDFServiceProvider; @@ -32,10 +33,11 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase ) )); $app->register(new BorderManagerServiceProvider()); + $app->register(new PhraseanetServiceProvider()); $app['phraseanet.configuration'] = array('border-manager' => array('enabled' => false)); $this->assertInstanceOf('Alchemy\Phrasea\Border\Manager', $app['border-manager']); - $this->assertNull($app['border-manager']->getPdfToText()); + $this->assertNull($app['phraseanet.metadata-reader']->getPdfToText()); } public function testItLoadsWithXPDF() @@ -48,6 +50,7 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase } $app = new Application(); + $app->register(new PhraseanetServiceProvider()); $app->register(new XPDFServiceProvider(), array( 'xpdf.configuration' => array( 'pdftotext.binaries' => $php, @@ -57,6 +60,6 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase $app['phraseanet.configuration'] = array('border-manager' => array('enabled' => false)); $this->assertInstanceOf('Alchemy\Phrasea\Border\Manager', $app['border-manager']); - $this->assertInstanceOf('XPDF\PdfToText', $app['border-manager']->getPdfToText()); + $this->assertInstanceOf('XPDF\PdfToText', $app['phraseanet.metadata-reader']->getPdfToText()); } } diff --git a/tests/Alchemy/Tests/Phrasea/Core/Provider/PhraseanetServiceProviderTest.php b/tests/Alchemy/Tests/Phrasea/Core/Provider/PhraseanetServiceProviderTest.php new file mode 100644 index 0000000000..f28c5f6c3c --- /dev/null +++ b/tests/Alchemy/Tests/Phrasea/Core/Provider/PhraseanetServiceProviderTest.php @@ -0,0 +1,42 @@ +