Merge pull request #1001 from romainneutron/fix-1707

[Ready][3.8] Fix #1707 : Update technical informations and metadata fields on record substitution
This commit is contained in:
Nicolas Le Goff
2014-03-02 19:14:06 +01:00
12 changed files with 297 additions and 261 deletions

View File

@@ -13,20 +13,9 @@ namespace Alchemy\Phrasea\Border;
use Alchemy\Phrasea\Border\Checker\CheckerInterface;
use Alchemy\Phrasea\Border\Attribute\AttributeInterface;
use Alchemy\Phrasea\Metadata\Tag\PdfText;
use Alchemy\Phrasea\Metadata\Tag\TfArchivedate;
use Alchemy\Phrasea\Metadata\Tag\TfBasename;
use Alchemy\Phrasea\Metadata\Tag\TfBits;
use Alchemy\Phrasea\Metadata\Tag\TfChannels;
use Alchemy\Phrasea\Metadata\Tag\TfDuration;
use Alchemy\Phrasea\Metadata\Tag\TfExtension;
use Alchemy\Phrasea\Metadata\Tag\TfFilename;
use Alchemy\Phrasea\Metadata\Tag\TfHeight;
use Alchemy\Phrasea\Metadata\Tag\TfMimetype;
use Alchemy\Phrasea\Metadata\Tag\TfQuarantine;
use Alchemy\Phrasea\Metadata\Tag\TfRecordid;
use Alchemy\Phrasea\Metadata\Tag\TfSize;
use Alchemy\Phrasea\Metadata\Tag\TfWidth;
use Alchemy\Phrasea\Border\Attribute\Metadata as MetadataAttr;
use Entities\LazaretAttribute;
use Entities\LazaretFile;
@@ -35,9 +24,9 @@ use MediaAlchemyst\Exception\ExceptionInterface as MediaAlchemystException;
use MediaAlchemyst\Specification\Image as ImageSpec;
use PHPExiftool\Driver\Metadata\Metadata;
use PHPExiftool\Driver\Value\Mono as MonoValue;
use PHPExiftool\Driver\Value\Multi;
use Silex\Application;
use Symfony\Component\Filesystem\Exception\IOException;
use XPDF\PdfToText;
/**
* Phraseanet Border Manager
@@ -51,7 +40,6 @@ class Manager
protected $checkers = array();
protected $app;
protected $filesystem;
protected $pdfToText;
const RECORD_CREATED = 1;
const LAZARET_CREATED = 2;
@@ -77,30 +65,6 @@ class Manager
$this->app = null;
}
/**
* Sets a PdfToText driver for extracting PDF content.
*
* @param PdfTotext $pdfToText The PdfToText Object
*
* @return Manager
*/
public function setPdfToText(PdfToText $pdfToText)
{
$this->pdfToText = $pdfToText;
return $this;
}
/**
* Gets the PdfToText driver.
*
* @return PdfTotext
*/
public function getPdfToText()
{
return $this->pdfToText;
}
/**
* Add a file to Phraseanet after having checked it
*
@@ -278,42 +242,7 @@ class Manager
)
);
$metadatas = array();
/**
* @todo $key is not tagname but fieldname
*/
$fieldToKeyMap = array();
if (! $fieldToKeyMap) {
foreach ($file->getCollection()->get_databox()->get_meta_structure() as $databox_field) {
$tagname = $databox_field->get_tag()->getTagname();
if ( ! isset($fieldToKeyMap[$tagname])) {
$fieldToKeyMap[$tagname] = array();
}
$fieldToKeyMap[$tagname][] = $databox_field->get_name();
}
}
foreach ($file->getMedia()->getMetadatas() as $metadata) {
$key = $metadata->getTag()->getTagname();
if ( ! isset($fieldToKeyMap[$key])) {
continue;
}
foreach ($fieldToKeyMap[$key] as $k) {
if ( ! isset($metadatas[$k])) {
$metadatas[$k] = array();
}
$metadatas[$k] = array_merge($metadatas[$k], $metadata->getValue()->asArray());
}
}
$newMetadata = $file->getMedia()->getMetadatas()->toArray();
foreach ($file->getAttributes() as $attribute) {
switch ($attribute->getName()) {
@@ -323,31 +252,13 @@ class Manager
* current metadata is metadata by source.
*/
case AttributeInterface::NAME_METAFIELD:
$key = $attribute->getField()->get_name();
if ( ! isset($metadatas[$key])) {
$metadatas[$key] = array();
}
$metadatas[$key] = array_merge($metadatas[$key], $attribute->getValue());
$values = $attribute->getValue();
$value = $attribute->getField()->is_multi() ? new Multi($values) : new MonoValue(array_pop($values));
$newMetadata[] = new Metadata($attribute->getField()->get_tag(), $value);
break;
case AttributeInterface::NAME_METADATA:
$key = $attribute->getValue()->getTag()->getTagname();
if ( ! isset($fieldToKeyMap[$key])) {
continue;
}
foreach ($fieldToKeyMap[$key] as $k) {
if ( ! isset($metadatas[$k])) {
$metadatas[$k] = array();
}
$metadatas[$k] = array_merge($metadatas[$k], $attribute->getValue()->getValue()->asArray());
}
$newMetadata[] = $attribute->getValue();
break;
case AttributeInterface::NAME_STATUS:
$element->set_binary_status(decbin(bindec($element->get_status()) | bindec($attribute->getValue())));
@@ -365,55 +276,7 @@ class Manager
}
}
$databox = $element->get_databox();
$metas = array();
foreach ($metadatas as $fieldname => $values) {
foreach ($databox->get_meta_structure()->get_elements() as $databox_field) {
if ($databox_field->get_name() == $fieldname) {
if ($databox_field->is_multi()) {
$tmpValues = array();
foreach ($values as $value) {
$tmpValues = array_merge($tmpValues, \caption_field::get_multi_values($value, $databox_field->get_separator()));
}
$values = array_unique($tmpValues);
foreach ($values as $value) {
if ( ! trim($value)) {
continue;
}
$metas[] = array(
'meta_struct_id' => $databox_field->get_id(),
'meta_id' => null,
'value' => $value,
);
}
} else {
$value = array_pop($values);
if ( ! trim($value)) {
continue;
}
$metas[] = array(
'meta_struct_id' => $databox_field->get_id(),
'meta_id' => null,
'value' => $value,
);
}
}
}
}
if ($metas) {
$element->set_metadatas($metas, true);
}
$this->app['phraseanet.metadata-setter']->replaceMetadata($newMetadata, $element);
$element->rebuild_subdefs();
$element->reindex();
@@ -506,105 +369,14 @@ class Manager
* to Phraseanet
*
* @param File $file The file
* @return \Doctrine\ORM\EntityManager
*/
protected function addMediaAttributes(File $file)
{
$metadataCollection = $this->app['phraseanet.metadata-reader']->read($file->getMedia());
if (method_exists($file->getMedia(), 'getWidth')) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfWidth(), new MonoValue($file->getMedia()->getWidth())
)
)
);
}
if (method_exists($file->getMedia(), 'getHeight')) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfHeight(), new MonoValue($file->getMedia()->getHeight())
)
)
);
}
if (method_exists($file->getMedia(), 'getChannels')) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfChannels(), new MonoValue($file->getMedia()->getChannels())
)
)
);
}
if (method_exists($file->getMedia(), 'getColorDepth')) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfBits(), new MonoValue($file->getMedia()->getColorDepth())
)
)
);
}
if (method_exists($file->getMedia(), 'getDuration')) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfDuration(), new MonoValue($file->getMedia()->getDuration())
)
)
);
}
if ($file->getFile()->getMimeType() == 'application/pdf' && null !== $this->pdfToText) {
try {
$text = $this->pdfToText->getText($file->getFile()->getRealPath());
if (trim($text)) {
$file->addAttribute(
new MetadataAttr(
new Metadata(
new PdfText(), new MonoValue($text)
)
)
);
}
} catch (\XPDF\Exception\Exception $e) {
}
}
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfMimetype(), new MonoValue($file->getFile()->getMimeType()))));
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfSize(), new MonoValue($file->getFile()->getSize()))));
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfBasename(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_BASENAME))
)
)
);
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfFilename(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_FILENAME))
)
)
);
$file->addAttribute(
new MetadataAttr(
new Metadata(
new TfExtension(), new MonoValue(pathinfo($file->getOriginalName(), PATHINFO_EXTENSION))
)
)
);
array_walk($metadataCollection, function (Metadata $metadata) use ($file) {
$file->addAttribute(new MetadataAttr($metadata));
});
return $this;
}

View File

@@ -143,6 +143,9 @@ class Tools implements ControllerProviderInterface
$media = $app['mediavorus']->guess($tempoFile);
$record->substitute_subdef('document', $media, $app);
$record->insertTechnicalDatas($app['mediavorus']);
$app['phraseanet.metadata-setter']->replaceMetadata($app['phraseanet.metadata-reader']->read($media), $record);
$app['phraseanet.logger']($record->get_databox())->log(
$record,
\Session_Logger::EVENT_SUBSTITUTE,

View File

@@ -15,7 +15,6 @@ use Alchemy\Phrasea\Border\Manager;
use Alchemy\Phrasea\Border\MimeGuesserConfiguration;
use Silex\Application;
use Silex\ServiceProviderInterface;
use XPDF\Exception\BinaryNotFoundException;
class BorderManagerServiceProvider implements ServiceProviderInterface
{
@@ -25,12 +24,6 @@ class BorderManagerServiceProvider implements ServiceProviderInterface
$app['border-manager'] = $app->share(function (Application $app) {
$borderManager = new Manager($app);
try {
$borderManager->setPdfToText($app['xpdf.pdftotext']);
} catch (BinaryNotFoundException $e) {
}
$options = $app['phraseanet.configuration']['border-manager'];
$registeredCheckers = array();

View File

@@ -11,9 +11,12 @@
namespace Alchemy\Phrasea\Core\Provider;
use Alchemy\Phrasea\Metadata\PhraseanetMetadataReader;
use Alchemy\Phrasea\Metadata\PhraseanetMetadataSetter;
use Alchemy\Phrasea\Security\Firewall;
use Silex\Application as SilexApplication;
use Silex\ServiceProviderInterface;
use XPDF\Exception\BinaryNotFoundException;
class PhraseanetServiceProvider implements ServiceProviderInterface
{
@@ -37,6 +40,22 @@ class PhraseanetServiceProvider implements ServiceProviderInterface
return $events;
});
$app['phraseanet.metadata-reader'] = $app->share(function (SilexApplication $app) {
$reader = new PhraseanetMetadataReader();
try {
$reader->setPdfToText($app['xpdf.pdftotext']);
} catch (BinaryNotFoundException $e) {
}
return $reader;
});
$app['phraseanet.metadata-setter'] = $app->share(function (SilexApplication $app) {
return new PhraseanetMetadataSetter();
});
}
public function boot(SilexApplication $app)

View File

@@ -0,0 +1,90 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Metadata;
use Alchemy\Phrasea\Metadata\Tag\PdfText;
use Alchemy\Phrasea\Metadata\Tag\TfBasename;
use Alchemy\Phrasea\Metadata\Tag\TfExtension;
use Alchemy\Phrasea\Metadata\Tag\TfFilename;
use Alchemy\Phrasea\Metadata\Tag\TfMimetype;
use Alchemy\Phrasea\Metadata\Tag\TfSize;
use MediaVorus\Media\MediaInterface;
use PHPExiftool\Driver\Metadata\Metadata;
use PHPExiftool\Driver\Value\Mono as MonoValue;
use XPDF\PdfToText;
use XPDF\Exception\Exception as XPDFException;
class PhraseanetMetadataReader
{
protected $pdfToText;
/**
* Sets a PdfToText driver for extracting PDF content.
*
* @param PdfTotext $pdfToText The PdfToText Object
*
* @return Manager
*/
public function setPdfToText(PdfToText $pdfToText)
{
$this->pdfToText = $pdfToText;
return $this;
}
/**
* Gets the PdfToText driver.
*
* @return PdfTotext
*/
public function getPdfToText()
{
return $this->pdfToText;
}
public function read(MediaInterface $media)
{
$ret = array();
$mimeType = $media->getFile()->getMimeType();
foreach (array(
'getWidth' => 'TfWidth',
'getHeight' => 'TfHeight',
'getChannels' => 'TfChannels',
'getColorDepth' => 'TfBits',
'getDuration' => 'TfDuration',
) as $method => $tag) {
$classname = 'Alchemy\\Phrasea\\Metadata\\Tag\\'.$tag;
if (method_exists($media, $method)) {
$ret[] = new Metadata(new $classname(), new MonoValue(call_user_func(array($media, $method))));
}
}
if ($mimeType == 'application/pdf' && null !== $this->pdfToText) {
try {
$text = $this->pdfToText->getText($media->getFile()->getRealPath());
if (trim($text)) {
$ret[] = new Metadata(new PdfText(), new MonoValue($text));
}
} catch (XPDFException $e) {
}
}
$ret[] = new Metadata(new TfMimetype(), new MonoValue($mimeType));
$ret[] = new Metadata(new TfSize(), new MonoValue($media->getFile()->getSize()));
$ret[] = new Metadata(new TfBasename(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_BASENAME)));
$ret[] = new Metadata(new TfFilename(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_FILENAME)));
$ret[] = new Metadata(new TfExtension(), new MonoValue(pathinfo($media->getFile()->getFileName(), PATHINFO_EXTENSION)));
return $ret;
}
}

View File

@@ -0,0 +1,98 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Metadata;
use PHPExiftool\Driver\Metadata\Metadata;
class PhraseanetMetadataSetter
{
public function replaceMetadata($metadataCollection, \record_adapter $record)
{
$metadatas = array();
$tagnameToFieldnameMapping = array();
$arrayStructure = iterator_to_array($record->get_databox()->get_meta_structure());
array_walk($arrayStructure, function ($databoxField) use (&$tagnameToFieldnameMapping) {
$tagname = $databoxField->get_tag()->getTagname();
$tagnameToFieldnameMapping[$tagname][] = $databoxField->get_name();
});
array_walk($metadataCollection, function (Metadata $metadata) use (&$metadatas, $tagnameToFieldnameMapping) {
$tagname = $metadata->getTag()->getTagname();
if (!isset($tagnameToFieldnameMapping[$tagname])) {
return;
}
foreach ($tagnameToFieldnameMapping[$tagname] as $fieldname) {
if ( ! isset($metadatas[$fieldname])) {
$metadatas[$fieldname] = array();
}
$metadatas[$fieldname] = array_merge($metadatas[$fieldname], $metadata->getValue()->asArray());
}
});
$metas = array();
array_walk($arrayStructure, function (\databox_field $field) use (&$metas, $metadatas, $record) {
$fieldname = $field->get_name();
if (!isset($metadatas[$fieldname])) {
return;
}
$values = $metadatas[$fieldname];
if ($record->get_caption()->has_field($fieldname)) {
foreach ($record->get_caption()->get_field($fieldname)->get_values() as $value) {
$value->delete();
}
}
if ($field->is_multi()) {
$tmpValues = array();
foreach ($values as $value) {
$tmpValues = array_merge($tmpValues, \caption_field::get_multi_values($value, $field->get_separator()));
}
$values = array_unique($tmpValues);
foreach ($values as $value) {
if (trim($value) === '') {
continue;
}
$metas[] = array(
'meta_struct_id' => $field->get_id(),
'meta_id' => null,
'value' => $value,
);
}
} else {
$value = array_pop($values);
if (trim($value) === '') {
return;
}
$metas[] = array(
'meta_struct_id' => $field->get_id(),
'meta_id' => null,
'value' => $value,
);
}
});
if (count($metas) > 0) {
$record->set_metadatas($metas, true);
}
}
}

View File

@@ -180,7 +180,7 @@ class caption_field implements cache_cacheableInterface
/**
*
* @return array
* @return \caption_Field_Value[]
*/
public function get_values()
{

View File

@@ -254,7 +254,7 @@ class caption_record implements caption_interface, cache_cacheableInterface
*/
public function get_field($fieldname)
{
foreach ($this->get_fields(null, true) as $meta_struct_id => $field) {
foreach ($this->get_fields(null, true) as $field) {
if ($field->get_name() == $fieldname) {
return $field;
}
@@ -263,6 +263,17 @@ class caption_record implements caption_interface, cache_cacheableInterface
throw new \Exception('Field not found');
}
public function has_field($fieldname)
{
foreach ($this->get_fields(null, true) as $field) {
if ($field->get_name() == $fieldname) {
return true;
}
}
return false;
}
/**
*
* @param type $label

View File

@@ -1371,7 +1371,12 @@ class record_adapter implements record_Interface, cache_cacheableInterface
return $this;
}
$sql = 'REPLACE INTO technical_datas (id, record_id, name, value)
$sql = 'DELETE FROM technical_datas WHERE record_id = :record_id';
$stmt = $this->get_databox()->get_connection()->prepare($sql);
$stmt->execute(array(':record_id' => $this->get_record_id()));
$stmt->closeCursor();
$sql = 'INSERT INTO technical_datas (id, record_id, name, value)
VALUES (null, :record_id, :name, :value)';
$stmt = $this->get_databox()->get_connection()->prepare($sql);

View File

@@ -408,7 +408,7 @@ class ManagerTest extends \PhraseanetWebTestCaseAuthenticatedAbstract
->getMock();
$manager = new ManagerTester(self::$DI['app']);
$manager->setPdfToText($pdfToText);
self::$DI['app']['phraseanet.metadata-reader']->setPdfToText($pdfToText);
$pdfToText->expects($this->once())
->method('getText')

View File

@@ -3,6 +3,7 @@
namespace Alchemy\Tests\Phrasea\Core\Provider;
use Alchemy\Phrasea\Core\Provider\BorderManagerServiceProvider;
use Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider;
use Silex\Application;
use Symfony\Component\Process\ExecutableFinder;
use XPDF\XPDFServiceProvider;
@@ -32,10 +33,11 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase
)
));
$app->register(new BorderManagerServiceProvider());
$app->register(new PhraseanetServiceProvider());
$app['phraseanet.configuration'] = array('border-manager' => array('enabled' => false));
$this->assertInstanceOf('Alchemy\Phrasea\Border\Manager', $app['border-manager']);
$this->assertNull($app['border-manager']->getPdfToText());
$this->assertNull($app['phraseanet.metadata-reader']->getPdfToText());
}
public function testItLoadsWithXPDF()
@@ -48,6 +50,7 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase
}
$app = new Application();
$app->register(new PhraseanetServiceProvider());
$app->register(new XPDFServiceProvider(), array(
'xpdf.configuration' => array(
'pdftotext.binaries' => $php,
@@ -57,6 +60,6 @@ class BorderManagerServiceProvidertest extends ServiceProviderTestCase
$app['phraseanet.configuration'] = array('border-manager' => array('enabled' => false));
$this->assertInstanceOf('Alchemy\Phrasea\Border\Manager', $app['border-manager']);
$this->assertInstanceOf('XPDF\PdfToText', $app['border-manager']->getPdfToText());
$this->assertInstanceOf('XPDF\PdfToText', $app['phraseanet.metadata-reader']->getPdfToText());
}
}

View File

@@ -0,0 +1,42 @@
<?php
namespace Alchemy\Tests\Phrasea\Core\Provider;
class PhraseanetServiceProvidertest extends ServiceProviderTestCase
{
public function provideServiceDescription()
{
return array(
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'phraseanet.appbox',
'appbox'
),
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'phraseanet.registry',
'registry'
),
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'firewall',
'Alchemy\Phrasea\Security\Firewall'
),
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'events-manager',
'eventsmanager_broker'
),
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'phraseanet.metadata-reader',
'Alchemy\Phrasea\Metadata\PhraseanetMetadataReader'
),
array(
'Alchemy\Phrasea\Core\Provider\PhraseanetServiceProvider',
'phraseanet.metadata-setter',
'Alchemy\Phrasea\Metadata\PhraseanetMetadataSetter'
),
);
}
}