Add elastic-search engine support

This commit is contained in:
Romain Neutron
2014-01-07 18:12:30 +01:00
parent e840df0b36
commit 97d96a6b32
18 changed files with 1264 additions and 49 deletions

View File

@@ -8,6 +8,7 @@ services:
- mysql - mysql
- memcached - memcached
- redis - redis
- elasticsearch
before_script: before_script:
- node --version - node --version

View File

@@ -11,7 +11,7 @@
namespace KonsoleKommander; namespace KonsoleKommander;
use Alchemy\Phrasea\Core\Version; use Alchemy\Phrasea\Command\SearchEngine\IndexFull;
use Alchemy\Phrasea\Command\BuildMissingSubdefs; use Alchemy\Phrasea\Command\BuildMissingSubdefs;
use Alchemy\Phrasea\Command\CreateCollection; use Alchemy\Phrasea\Command\CreateCollection;
use Alchemy\Phrasea\Command\MailTest; use Alchemy\Phrasea\Command\MailTest;
@@ -19,6 +19,7 @@ use Alchemy\Phrasea\Command\Compile\Configuration;
use Alchemy\Phrasea\Command\RecordAdd; use Alchemy\Phrasea\Command\RecordAdd;
use Alchemy\Phrasea\Command\RescanTechnicalDatas; use Alchemy\Phrasea\Command\RescanTechnicalDatas;
use Alchemy\Phrasea\Command\UpgradeDBDatas; use Alchemy\Phrasea\Command\UpgradeDBDatas;
use Alchemy\Phrasea\Core\Version;
use Alchemy\Phrasea\CLI; use Alchemy\Phrasea\CLI;
use Alchemy\Phrasea\Command\Plugin\AddPlugin; use Alchemy\Phrasea\Command\Plugin\AddPlugin;
use Alchemy\Phrasea\Command\Plugin\RemovePlugin; use Alchemy\Phrasea\Command\Plugin\RemovePlugin;
@@ -107,6 +108,10 @@ $cli->command(new Configuration());
$cli->command(new XSendFileConfigurationDumper()); $cli->command(new XSendFileConfigurationDumper());
$cli->command(new XSendFileMappingGenerator()); $cli->command(new XSendFileMappingGenerator());
if ($cli['phraseanet.SE']->getName() === 'ElasticSearch') {
$cli->command(new IndexFull('searchengine:index'));
}
$cli->loadPlugins(); $cli->loadPlugins();
exit(is_int($cli->run()) ? : 1); exit(is_int($cli->run()) ? : 1);

View File

@@ -13,6 +13,7 @@
"dailymotion/sdk" : "~1.5", "dailymotion/sdk" : "~1.5",
"data-uri/data-uri" : "~0.1.0", "data-uri/data-uri" : "~0.1.0",
"doctrine/orm" : "~2.4.0", "doctrine/orm" : "~2.4.0",
"elasticsearch/elasticsearch" : "~0.4",
"facebook/php-sdk" : "~3.0", "facebook/php-sdk" : "~3.0",
"gedmo/doctrine-extensions" : "~2.3.0", "gedmo/doctrine-extensions" : "~2.3.0",
"alchemy/google-plus-api-client" : "~0.6.2", "alchemy/google-plus-api-client" : "~0.6.2",

75
composer.lock generated
View File

@@ -3,7 +3,7 @@
"This file locks the dependencies of your project to a known state", "This file locks the dependencies of your project to a known state",
"Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file" "Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file"
], ],
"hash": "713759a9aea9d0178ef01ce2f7aea6b2", "hash": "cf050120950d3b57c24a52d01bbf94a3",
"packages": [ "packages": [
{ {
"name": "alchemy-fr/tcpdf-clone", "name": "alchemy-fr/tcpdf-clone",
@@ -1036,6 +1036,61 @@
], ],
"time": "2013-11-12 12:40:13" "time": "2013-11-12 12:40:13"
}, },
{
"name": "elasticsearch/elasticsearch",
"version": "v0.4.3",
"source": {
"type": "git",
"url": "https://github.com/elasticsearch/elasticsearch-php.git",
"reference": "f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/elasticsearch/elasticsearch-php/zipball/f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f",
"reference": "f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f",
"shasum": ""
},
"require": {
"ext-curl": "*",
"guzzle/guzzle": "~3.7",
"monolog/monolog": "~1.5",
"php": ">=5.3.9",
"pimple/pimple": "~1.0",
"psr/log": "~1.0"
},
"require-dev": {
"athletic/athletic": "~0.1",
"mikey179/vfsstream": "~1.2",
"mockery/mockery": "dev-master@dev",
"phpunit/phpunit": "3.7.*",
"satooshi/php-coveralls": "dev-master",
"symfony/yaml": "2.4.*@dev"
},
"type": "library",
"autoload": {
"psr-0": {
"Elasticsearch": "src/",
"Elasticsearch\\Tests": "tests/",
"Elasticsearch\\Benchmarks": "benchmarks/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"Apache 2"
],
"authors": [
{
"name": "Zachary Tong"
}
],
"description": "PHP Client for Elasticsearch",
"keywords": [
"client",
"elasticsearch",
"search"
],
"time": "2013-12-02 15:31:03"
},
{ {
"name": "evenement/evenement", "name": "evenement/evenement",
"version": "v1.0.0", "version": "v1.0.0",
@@ -1124,16 +1179,16 @@
}, },
{ {
"name": "gedmo/doctrine-extensions", "name": "gedmo/doctrine-extensions",
"version": "v2.3.8", "version": "v2.3.9",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/l3pp4rd/DoctrineExtensions.git", "url": "https://github.com/l3pp4rd/DoctrineExtensions.git",
"reference": "d2bef940268e4dc5f0f050e80458169a7267813a" "reference": "35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/l3pp4rd/DoctrineExtensions/zipball/d2bef940268e4dc5f0f050e80458169a7267813a", "url": "https://api.github.com/repos/l3pp4rd/DoctrineExtensions/zipball/35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54",
"reference": "d2bef940268e4dc5f0f050e80458169a7267813a", "reference": "35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@@ -1200,7 +1255,7 @@
"tree", "tree",
"uploadable" "uploadable"
], ],
"time": "2013-11-09 21:23:13" "time": "2014-01-12 16:34:06"
}, },
{ {
"name": "guzzle/guzzle", "name": "guzzle/guzzle",
@@ -2669,12 +2724,12 @@
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/silexphp/Silex.git", "url": "https://github.com/silexphp/Silex.git",
"reference": "5633f1b652a6cbc0c0ed4b6b86c0560c4560731a" "reference": "93dde2797c6e8498b844ac0eca6e8dd6713fb172"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/silexphp/Silex/zipball/5633f1b652a6cbc0c0ed4b6b86c0560c4560731a", "url": "https://api.github.com/repos/silexphp/Silex/zipball/93dde2797c6e8498b844ac0eca6e8dd6713fb172",
"reference": "5633f1b652a6cbc0c0ed4b6b86c0560c4560731a", "reference": "93dde2797c6e8498b844ac0eca6e8dd6713fb172",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@@ -2745,7 +2800,7 @@
"keywords": [ "keywords": [
"microframework" "microframework"
], ],
"time": "2013-12-03 18:11:54" "time": "2014-01-08 07:22:38"
}, },
{ {
"name": "silex/web-profiler", "name": "silex/web-profiler",

View File

@@ -0,0 +1,27 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Command\SearchEngine;
use Alchemy\Phrasea\Command\Command;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
use Elasticsearch\Common\Exceptions\ElasticsearchException;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
class IndexFull extends Command
{
protected function doExecute(InputInterface $input, OutputInterface $output)
{
$this->container['ES.indexer']->createIndex();
$this->container['ES.indexer']->reindexAll();
}
}

View File

@@ -14,12 +14,13 @@ namespace Alchemy\Phrasea\Core\Provider;
use Alchemy\Phrasea\SearchEngine\SearchEngineLogger; use Alchemy\Phrasea\SearchEngine\SearchEngineLogger;
use Alchemy\Phrasea\Exception\InvalidArgumentException; use Alchemy\Phrasea\Exception\InvalidArgumentException;
use Alchemy\Phrasea\SearchEngine\SearchEngineInterface; use Alchemy\Phrasea\SearchEngine\SearchEngineInterface;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
use Elasticsearch\Client;
use Silex\Application; use Silex\Application;
use Silex\ServiceProviderInterface; use Silex\ServiceProviderInterface;
class SearchEngineServiceProvider implements ServiceProviderInterface class SearchEngineServiceProvider implements ServiceProviderInterface
{ {
public function register(Application $app) public function register(Application $app)
{ {
$app['phraseanet.SE'] = $app->share(function ($app) { $app['phraseanet.SE'] = $app->share(function ($app) {
@@ -45,6 +46,10 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
$app['phraseanet.SE.subscriber'] = $app->share(function ($app) { $app['phraseanet.SE.subscriber'] = $app->share(function ($app) {
return $app['phraseanet.SE.engine-class']::createSubscriber($app); return $app['phraseanet.SE.engine-class']::createSubscriber($app);
}); });
$app['ES.indexer'] = $app->share(function ($app) {
return new Indexer($app['phraseanet.SE'], $app['monolog'], $app['phraseanet.appbox']);
});
} }
public function boot(Application $app) public function boot(Application $app)

View File

@@ -12,6 +12,7 @@
namespace Alchemy\Phrasea\Core\Provider; namespace Alchemy\Phrasea\Core\Provider;
use Alchemy\Phrasea\Model\Serializer\CaptionSerializer; use Alchemy\Phrasea\Model\Serializer\CaptionSerializer;
use Alchemy\Phrasea\Model\Serializer\ESRecordSerializer;
use Silex\Application; use Silex\Application;
use Silex\ServiceProviderInterface; use Silex\ServiceProviderInterface;
@@ -22,6 +23,10 @@ class SerializerServiceProvider implements ServiceProviderInterface
$app['serializer.caption'] = $app->share(function (Application $app) { $app['serializer.caption'] = $app->share(function (Application $app) {
return new CaptionSerializer(); return new CaptionSerializer();
}); });
$app['serializer.es-record'] = $app->share(function (Application $app) {
return new ESRecordSerializer();
});
} }
public function boot(Application $app) public function boot(Application $app)

View File

@@ -0,0 +1,49 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Model\Serializer;
abstract class AbstractSerializer
{
protected function sanitizeSerializedValue($value)
{
return str_replace([
"\x00", //null
"\x01", //start heading
"\x02", //start text
"\x03", //end of text
"\x04", //end of transmission
"\x05", //enquiry
"\x06", //acknowledge
"\x07", //bell
"\x08", //backspace
"\x0C", //new page
"\x0E", //shift out
"\x0F", //shift in
"\x10", //data link escape
"\x11", //dc 1
"\x12", //dc 2
"\x13", //dc 3
"\x14", //dc 4
"\x15", //negative ack
"\x16", //synchronous idle
"\x17", //end of trans block
"\x18", //cancel
"\x19", //end of medium
"\x1A", //substitute
"\x1B", //escape
"\x1C", //file separator
"\x1D", //group sep
"\x1E", //record sep
"\x1F", //unit sep
], '', $value);
}
}

View File

@@ -13,7 +13,7 @@ namespace Alchemy\Phrasea\Model\Serializer;
use Symfony\Component\Yaml\Dumper as YamlDumper; use Symfony\Component\Yaml\Dumper as YamlDumper;
class CaptionSerializer class CaptionSerializer extends AbstractSerializer
{ {
const SERIALIZE_XML = 'xml'; const SERIALIZE_XML = 'xml';
const SERIALIZE_YAML = 'yaml'; const SERIALIZE_YAML = 'yaml';
@@ -106,38 +106,4 @@ class CaptionSerializer
return $dom_doc->saveXML(); return $dom_doc->saveXML();
} }
protected function sanitizeSerializedValue($value)
{
return str_replace([
"\x00", //null
"\x01", //start heading
"\x02", //start text
"\x03", //end of text
"\x04", //end of transmission
"\x05", //enquiry
"\x06", //acknowledge
"\x07", //bell
"\x08", //backspace
"\x0C", //new page
"\x0E", //shift out
"\x0F", //shift in
"\x10", //data link escape
"\x11", //dc 1
"\x12", //dc 2
"\x13", //dc 3
"\x14", //dc 4
"\x15", //negative ack
"\x16", //synchronous idle
"\x17", //end of trans block
"\x18", //cancel
"\x19", //end of medium
"\x1A", //substitute
"\x1B", //escape
"\x1C", //file separator
"\x1D", //group sep
"\x1E", //record sep
"\x1F", //unit sep
], '', $value);
}
} }

View File

@@ -0,0 +1,81 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\Model\Serializer;
class ESRecordSerializer extends AbstractSerializer
{
public function serialize(\record_adapter $record)
{
$technicalInformation = $caption = $business = $status = [];
foreach ($record->get_technical_infos() as $name => $value) {
$technicalInformation[$name] = $value;
}
foreach ($record->get_caption()->get_fields(null, true) as $field) {
$isDate = $field->get_databox_field()->get_type() === \databox_field::TYPE_DATE;
$isBusiness = $field->get_databox_field()->isBusiness();
$vi = $field->get_values();
if ($field->is_multi()) {
$values = [];
foreach ($vi as $value) {
$values[] = $this->sanitizeSerializedValue($value->getValue());
}
$value = implode (' ' . $field->get_databox_field()->get_separator(false).' ', $values);
} else {
$value = $this->sanitizeSerializedValue(array_pop($vi)->getValue());
}
if ($isDate) {
try {
$date = new \DateTime($value);
$value = $date->format(DATE_ATOM);
} catch (\Exception $e) {
continue;
}
}
if ($isBusiness) {
$business[$field->get_databox_field()->get_name()] = $value;
}
$caption[$field->get_databox_field()->get_name()] = $value;
}
$i = 0;
foreach (preg_split('//', strrev($record->get_status()), -1, PREG_SPLIT_NO_EMPTY) as $val) {
$status['status-'.$i] = (int) $val;
$i++;
}
return [
'databox_id' => $record->get_sbas_id(),
'record_id' => $record->get_record_id(),
'collection_id' => $record->get_collection()->get_coll_id(),
'base_id' => $record->get_base_id(),
'mime_type' => $record->get_mime(),
'title' => $record->get_title(),
'original_name' => $record->get_original_name(),
'updated_on' => $record->get_modification_date()->format(DATE_ATOM),
'created_on' => $record->get_creation_date()->format(DATE_ATOM),
'sha256' => $record->get_sha256(),
'technical_informations' => $technicalInformation,
'phrasea_type' => $record->get_type(),
'type' => $record->is_grouping() ? 'story' : 'record',
'uuid' => $record->get_uuid(),
'caption' => $caption,
'status' => $status,
'caption-business' => $business,
];
}
}

View File

@@ -0,0 +1,67 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Alchemy\Phrasea\Application;
use Alchemy\Phrasea\SearchEngine\AbstractConfigurationPanel;
use Symfony\Component\HttpFoundation\Request;
use Alchemy\Phrasea\Core\Configuration\ConfigurationInterface;
class ConfigurationPanel extends AbstractConfigurationPanel
{
private $searchEngine;
public function __construct(ElasticSearchEngine $engine, ConfigurationInterface $conf)
{
$this->searchEngine = $engine;
$this->conf = $conf;
}
/**
* {@inheritdoc}
*/
public function getName()
{
return 'elastic-search-engine';
}
/**
* {@inheritdoc}
*/
public function get(Application $app, Request $request)
{
return $app['twig']->render('admin/search-engine/elastic-search.html.twig', ['configuration' => $this->getConfiguration()]);
}
/**
* {@inheritdoc}
*/
public function post(Application $app, Request $request)
{
$configuration = $this->getConfiguration();
$configuration['host'] = $request->request->get('host');
$configuration['port'] = $request->request->get('port');
$this->saveConfiguration($configuration);
return $app->redirectPath('admin_searchengine_get');
}
/**
* {@inheritdoc}
*/
public function getConfiguration()
{
return isset($this->conf['main']['search-engine']['options']) ? $this->conf['main']['search-engine']['options'] : [];
}
}

View File

@@ -0,0 +1,581 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Alchemy\Phrasea\Application;
use Alchemy\Phrasea\Model\Serializer\ESRecordSerializer;
use Alchemy\Phrasea\SearchEngine\SearchEngineInterface;
use Alchemy\Phrasea\SearchEngine\SearchEngineOptions;
use Alchemy\Phrasea\SearchEngine\SearchEngineResult;
use Alchemy\Phrasea\Exception\RuntimeException;
use Doctrine\Common\Collections\ArrayCollection;
use Alchemy\Phrasea\Model\Entities\FeedEntry;
use Elasticsearch\Client;
class ElasticSearchEngine implements SearchEngineInterface
{
private $app;
/** @var Client */
private $client;
private $dateFields;
private $indexName;
private $serializer;
private $configurationPanel;
public function __construct(Application $app, Client $client, ESRecordSerializer $serializer, $indexName)
{
$this->app = $app;
$this->client = $client;
$this->serializer = $serializer;
if ('' === trim($indexName)) {
throw new \InvalidArgumentException('The provided index name is invalid.');
}
$this->indexName = $indexName;
}
public function getIndexName()
{
return $this->indexName;
}
/**
* @return Client
*/
public function getClient()
{
return $this->client;
}
/**
* {@inheritdoc}
*/
public function getName()
{
return 'ElasticSearch';
}
/**
* {@inheritdoc}
*/
public function getStatus()
{
$data = $this->client->info();
$version = $data['version'];
unset($data['version']);
foreach ($version as $prop => $value) {
$data['version:'.$prop] = $value;
}
$ret = [];
foreach ($data as $key => $value) {
$ret[] = [$key, $value];
}
return $ret;
}
/**
* {@inheritdoc}
*/
public function getConfigurationPanel()
{
if (!$this->configurationPanel) {
$this->configurationPanel = new ConfigurationPanel($this, $this->app['configuration.store']);
}
return $this->configurationPanel;
}
/**
* {@inheritdoc}
*/
public function getAvailableDateFields()
{
if (!$this->dateFields) {
foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) {
foreach ($databox->get_meta_structure() as $databox_field) {
if ($databox_field->get_type() != \databox_field::TYPE_DATE) {
continue;
}
$this->dateFields[] = $databox_field->get_name();
}
}
$this->dateFields = array_unique($this->dateFields);
}
return $this->dateFields;
}
/**
* {@inheritdoc}
*/
public function getAvailableSort()
{
return [
'score' => $this->app->trans('pertinence'),
'created_on' => $this->app->trans('date dajout'),
];
}
/**
* {@inheritdoc}
*/
public function getDefaultSort()
{
return 'score';
}
/**
* {@inheritdoc}
*/
public function isStemmingEnabled()
{
return false;
}
/**
* {@inheritdoc}
*/
public function getAvailableOrder()
{
return [
'desc' => $this->app->trans('descendant'),
'asc' => $this->app->trans('ascendant'),
];
}
/**
* {@inheritdoc}
*/
public function hasStemming()
{
return false;
}
/**
* {@inheritdoc}
*/
public function getAvailableTypes()
{
return [self::GEM_TYPE_RECORD, self::GEM_TYPE_STORY];
}
/**
* {@inheritdoc}
*/
public function addRecord(\record_adapter $record)
{
$this->doExecute('index', [
'body' => $this->serializer->serialize($record),
'index' => $this->indexName,
'type' => 'record',
'id' => sprintf('%d-%d', $record->get_sbas_id(), $record->get_record_id()),
]);
return $this;
}
/**
* {@inheritdoc}
*/
public function removeRecord(\record_adapter $record)
{
$this->doExecute('delete', [
'index' => $this->indexName,
'type' => 'record',
'id' => sprintf('%s-%s', $record->get_sbas_id(), $record->get_record_id()),
]);
return $this;
}
/**
* {@inheritdoc}
*/
public function updateRecord(\record_adapter $record)
{
$this->addRecord($record);
return $this;
}
/**
* {@inheritdoc}
*/
public function addStory(\record_adapter $story)
{
$this->addRecord($story);
return $this;
}
/**
* {@inheritdoc}
*/
public function removeStory(\record_adapter $story)
{
$this->removeRecord($story);
return $this;
}
/**
* {@inheritdoc}
*/
public function updateStory(\record_adapter $story)
{
$this->addRecord($story);
return $this;
}
/**
* {@inheritdoc}
*/
public function addFeedEntry(FeedEntry $entry)
{
throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.');
}
/**
* {@inheritdoc}
*/
public function removeFeedEntry(FeedEntry $entry)
{
throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.');
}
/**
* {@inheritdoc}
*/
public function updateFeedEntry(FeedEntry $entry)
{
throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.');
}
/**
* {@inheritdoc}
*/
public function query($query, $offset, $perPage, SearchEngineOptions $options = null)
{
$query = 'all' !== strtolower($query) ? $query : '';
$params = $this->createQueryParams($query, $options ?: new SearchEngineOptions());
$params['from'] = $offset;
$params['size'] = $perPage;
$res = $this->doExecute('search', $params);
$results = new ArrayCollection();
$suggestions = new ArrayCollection();
$n = 0;
foreach ($res['hits']['hits'] as $hit) {
$results[] = new \record_adapter($this->app, $hit['fields']['databox_id'], $hit['fields']['record_id'], $n);
$n++;
}
return new SearchEngineResult($results, $query, $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $suggestions, [], $this->indexName);
}
/**
* {@inheritdoc}
*/
public function autocomplete($query, SearchEngineOptions $options)
{
throw new RuntimeException('ElasticSearch engine currently does not support auto-complete.');
}
/**
* {@inheritdoc}
*/
public function excerpt($query, $fields, \record_adapter $record, SearchEngineOptions $options = null)
{
$query = 'all' !== strtolower($query) ? $query : '';
$params = $this->createQueryParams($query, $options ?: new SearchEngineOptions(), $record);
$res = $this->doExecute('search', $params);
foreach ($fields as $name => $field) {
if (isset($res['hits']['hits'][0]['highlight']['caption.'.$name])) {
$ret[] = $res['hits']['hits'][0]['highlight']['caption.'.$name][0];
} else {
$ret[] = $field['value'];
}
}
return $ret;
}
/**
* {@inheritdoc}
*/
public function resetCache()
{
}
/**
* {@inheritdoc}
*/
public function clearCache()
{
}
/**
* {@inheritdoc}
*/
public function clearAllCache(\DateTime $date = null)
{
}
/**
* {@inheritdoc}
*/
public static function createSubscriber(Application $app)
{
return new ElasticSearchEngineSubscriber();
}
/**
* {@inheritdoc}
*
* @return ElastcSearchEngine
*/
public static function create(Application $app, array $options = [])
{
$options = array_replace([
'host' => '127.0.0.1',
'port' => '9200',
'index' => 'phraseanet',
], $options);
$client = new Client(['hosts' => [sprintf('%s:%s', $options['host'], $options['port'])]]);
return new static($app, $client, $app['serializer.es-record'], $options['index']);
}
private function createQueryParams($query, SearchEngineOptions $options, \record_adapter $record = null)
{
$params = [
'index' => $this->indexName,
'type' => 'record',
'body' => [
'fields' => ['databox_id', 'record_id'],
'sort' => $this->createSortQueryParams($options),
]
];
$ESquery = $this->createESQuery($query, $options);
$filters = $this->createFilters($options);
if ($record) {
$filters[] = [
'term' => [
'_id' => sprintf('%s-%s', $record->get_sbas_id(), $record->get_record_id()),
]
];
$fields = [];
foreach ($record->get_databox()->get_meta_structure() as $dbField) {
$fields['caption.'.$dbField->get_name()] = new \stdClass();
}
$params['body']['highlight'] = [
"pre_tags" => ["[[em]]"],
"post_tags" => ["[[/em]]"],
"fields" => $fields,
];
}
if (count($filters) > 0) {
$ESquery = [
'filtered' => [
'query' => $ESquery,
'filter' => [
'and' => $filters
]
]
];
}
$params['body']['query'] = $ESquery;
return $params;
}
private function createESQuery($query, SearchEngineOptions $options)
{
$preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0);
$search = [];
if ($preg > 0) {
$search['bool']['must'][] = [
'term' => [
'record_id' => $matches[2],
],
];
$query = '';
}
if ('' !== $query) {
if (0 < count($options->getBusinessFieldsOn())) {
$fields = [];
foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) {
foreach ($databox->get_meta_structure() as $dbField) {
if ($dbField->isBusiness()) {
$fields[$dbField->get_name()] = [
'match' => [
'caption.'.$dbField->get_name() => $query,
]
];
}
}
}
if (count($fields) > 0) {
foreach ($options->getBusinessFieldsOn() as $coll) {
$search['bool']['should'][] = [
'bool' => [
'must' => [
[
'bool' => [
'should' => array_values($fields)
]
],[
'term' => [
'base_id' => $coll->get_base_id(),
]
]
]
]
];
}
}
}
if ($options->getFields()) {
foreach ($options->getFields() as $field) {
$search['bool']['should'][] = [
'match' => [
'caption.'.$field->get_name() => $query,
]
];
}
} else {
$search['bool']['should'][] = [
'match' => [
'_all' => $query,
]
];
}
} else {
$search['bool']['should'][] = [
'match_all' => new \stdClass(),
];
}
return $search;
}
private function createFilters(SearchEngineOptions $options)
{
$filters = [];
$status_opts = $options->getStatus();
foreach ($options->getDataboxes() as $databox) {
foreach ($databox->get_statusbits() as $n => $status) {
if (!array_key_exists($n, $status_opts)) {
continue;
}
if (!array_key_exists($databox->get_sbas_id(), $status_opts[$n])) {
continue;
}
$filters[] = [
'term' => [
'status.status-'.$n => $status_opts[$n][$databox->get_sbas_id()],
]
];
}
}
$filters[] = [
'terms' => [
'base_id' => array_map(function (\collection $coll) { return $coll->get_base_id(); }, $options->getCollections())
]
];
$filters[] = [
'term' => [
'type' => $options->getSearchType() === SearchEngineOptions::RECORD_RECORD ? 'record' : 'story',
]
];
if ($options->getDateFields() && ($options->getMaxDate() || $options->getMinDate())) {
$range = [];
if ($options->getMaxDate()) {
$range['lte'] = $options->getMaxDate()->format(DATE_ATOM);
}
if ($options->getMinDate()) {
$range['gte'] = $options->getMinDate()->format(DATE_ATOM);
}
foreach ($options->getDateFields() as $dateField) {
$filters[] = [
'range' => [
'caption.'.$dateField->get_name() => $range
]
];
}
}
if ($options->getRecordType()) {
$filters[] = [
'term' => [
'phrasea_type' => $options->getRecordType(),
]
];
}
return $filters;
}
private function createSortQueryParams(SearchEngineOptions $options)
{
$sort = [];
if ($options->getSortBy() === 'score') {
$sort['_score'] = $options->getSortOrder();
}
$sort['created_on'] = $options->getSortOrder();
return $sort;
}
private function doExecute($method, array $params)
{
$res = call_user_func([$this->client, $method], $params);
if (isset($res['error'])) {
throw new RuntimeException('Unable to execute method '.$method);
}
return $res;
}
}

View File

@@ -0,0 +1,22 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
class ElasticSearchEngineSubscriber implements EventSubscriberInterface
{
public static function getSubscribedEvents()
{
return [];
}
}

View File

@@ -0,0 +1,297 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\SearchEngine\Elastic;
use Psr\Log\LoggerInterface;
class Indexer
{
private $engine;
private $logger;
private $appbox;
public function __construct(ElasticSearchEngine $engine, LoggerInterface $logger, \appbox $appbox)
{
$this->engine = $engine;
$this->logger = $logger;
$this->appbox = $appbox;
}
public function createIndex()
{
$indexParams['index'] = $this->engine->getIndexName();
// Index Settings
$indexParams['body']['settings']['number_of_shards'] = 3;
$indexParams['body']['settings']['number_of_replicas'] = 0;
$captionFields = [];
$businessFields = [];
foreach ($this->appbox->get_databoxes() as $databox) {
foreach ($databox->get_meta_structure() as $dbField) {
$type = 'string';
if (\databox_field::TYPE_DATE === $dbField->get_type()) {
$type = 'date';
}
if (isset($captionFields[$dbField->get_name()]) && $type !== $captionFields[$dbField->get_name()]['type']) {
$type = 'string';
}
$captionFields[$dbField->get_name()] = [
'type' => $type,
'include_in_all' => !$dbField->isBusiness(),
'analyzer' => 'french',
];
if ($dbField->isBusiness()) {
$businessFields[$dbField->get_name()] = [
'type' => $type,
'include_in_all' => false,
'analyzer' => 'french',
];
}
}
}
$status = [];
for ($i = 0; $i <= 32; $i ++) {
$status['status-'.$i] = [
'type' => 'integer',
];
}
$recordTypeMapping = [
'_source' => [
'enabled' => true
],
'_all' => [
'analyzer' => 'french',
],
'analysis' => [
'analyzer' => [
'french' => [
'type' => 'custom',
'tokenizer' => 'letter',
'filter' => ["asciifolding", "lowercase", "french_stem", "stop_fr"]
],
'autocomplete_french' => [
'type' => 'custom',
'tokenizer' => 'letter',
'filter' => ["asciifolding", "lowercase", "stop_fr"]
]
],
'filter' => [
'stop_fr' => [
'type' => 'stop',
'stopwords' => ['l', 'm', 't', 'qu', 'n', 's', 'j', 'd'],
]
],
],
'properties' => [
'record_id' => [
'type' => 'integer',
'index' => 'not_analyzed',
],
'databox_id' => [
'type' => 'integer',
'index' => 'not_analyzed',
],
'base_id' => [
'type' => 'integer',
'index' => 'not_analyzed',
],
'mime_type' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'title' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'original_name' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'updated_on' => [
'type' => 'date',
'index' => 'not_analyzed',
],
'created_on' => [
'type' => 'date',
'index' => 'not_analyzed',
],
'collection_id' => [
'type' => 'integer',
'index' => 'not_analyzed',
],
'sha256' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'type' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'phrasea_type' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'uuid' => [
'type' => 'string',
'index' => 'not_analyzed',
],
'status' => [
'properties' => $status
],
"technical_informations" => [
'properties' => [
\media_subdef::TC_DATA_WIDTH => [
'type' => 'integer'
],
\media_subdef::TC_DATA_HEIGHT => [
'type' => 'integer'
],
\media_subdef::TC_DATA_COLORSPACE => [
'type' => 'string'
],
\media_subdef::TC_DATA_CHANNELS => [
'type' => 'integer'
],
\media_subdef::TC_DATA_ORIENTATION => [
'type' => 'integer'
],
\media_subdef::TC_DATA_COLORDEPTH => [
'type' => 'integer'
],
\media_subdef::TC_DATA_DURATION => [
'type' => 'integer'
],
\media_subdef::TC_DATA_AUDIOCODEC => [
'type' => 'string'
],
\media_subdef::TC_DATA_AUDIOSAMPLERATE => [
'type' => 'integer'
],
\media_subdef::TC_DATA_VIDEOCODEC => [
'type' => 'string'
],
\media_subdef::TC_DATA_FRAMERATE => [
'type' => 'float'
],
\media_subdef::TC_DATA_MIMETYPE => [
'type' => 'string'
],
\media_subdef::TC_DATA_FILESIZE => [
'type' => 'long'
],
\media_subdef::TC_DATA_LONGITUDE => [
'type' => 'float'
],
\media_subdef::TC_DATA_LATITUDE => [
'type' => 'float'
],
\media_subdef::TC_DATA_FOCALLENGTH => [
'type' => 'float'
],
\media_subdef::TC_DATA_CAMERAMODEL => [
'type' => 'string'
],
\media_subdef::TC_DATA_FLASHFIRED => [
'type' => 'boolean'
],
\media_subdef::TC_DATA_APERTURE => [
'type' => 'float'
],
\media_subdef::TC_DATA_SHUTTERSPEED => [
'type' => 'float'
],
\media_subdef::TC_DATA_HYPERFOCALDISTANCE => [
'type' => 'float'
],
\media_subdef::TC_DATA_ISO => [
'type' => 'integer'
],
\media_subdef::TC_DATA_LIGHTVALUE => [
'type' => 'float'
],
]
],
"caption" => [
'properties' => $captionFields
],
]
];
if (0 < count ($businessFields)) {
$recordTypeMapping['properties']['caption-business'] = [
'properties' => $businessFields
];
}
$indexParams['body']['mappings']['record'] = $recordTypeMapping;
if ($this->engine->getClient()->indices()->exists(['index' => $this->engine->getIndexName()])) {
$this->engine->getClient()->indices()->delete(['index' => $this->engine->getIndexName()]);
}
$ret = $this->engine->getClient()->indices()->create($indexParams);
if (isset($ret['error']) || !$ret['ok']) {
throw new \RuntimeException('Unable to create index');
}
}
public function reindexAll()
{
$qty = 10;
$params['index'] = $this->engine->getIndexName();
$params['body']['index']['refresh_interval'] = 300;
$ret = $this->engine->getClient()->indices()->putSettings($params);
if (!isset($ret['ok']) || !$ret['ok']) {
$this->logger->error('Unable to set the refresh interval to 300 s. .');
}
foreach ($this->appbox->get_databoxes() as $databox) {
$offset = 0;
do {
$sql = 'SELECT record_id FROM record
WHERE parent_record_id = 0
ORDER BY record_id ASC LIMIT '.$offset.', '.$qty;
$stmt = $databox->get_connection()->prepare($sql);
$stmt->execute();
$rows = $stmt->fetchAll(\PDO::FETCH_ASSOC);
$stmt->closeCursor();
foreach ($rows as $row) {
$record = $databox->get_record($row['record_id']);
$this->engine->addRecord($record);
}
gc_collect_cycles();
$offset += $qty;
} while (count($rows) > 0);
}
$params['index'] = $this->engine->getIndexName();
$params['body']['index']['refresh_interval'] = 1;
$ret = $this->engine->getClient()->indices()->putSettings($params);
if (!isset($ret['ok']) || !$ret['ok']) {
throw new \RuntimeException('Unable to set the refresh interval to 1 s. .');
}
}
}

View File

@@ -826,12 +826,17 @@ class databox_field implements cache_cacheableInterface
} }
/** /**
* @param Boolean $all If set to false, returns a one-char separator to use for serialiation
* *
* @return string * @return string
*/ */
public function get_separator() public function get_separator($all = true)
{ {
return $this->separator; if ($all) {
return $this->separator;
}
return substr($this->separator, 0, 1);
} }
/** /**

View File

@@ -624,6 +624,7 @@ class media_subdef extends media_abstract implements cache_cacheableInterface
self::TC_DATA_AUDIOSAMPLERATE => 'getAudioSampleRate', self::TC_DATA_AUDIOSAMPLERATE => 'getAudioSampleRate',
self::TC_DATA_VIDEOCODEC => 'getVideoCodec', self::TC_DATA_VIDEOCODEC => 'getVideoCodec',
self::TC_DATA_AUDIOCODEC => 'getAudioCodec', self::TC_DATA_AUDIOCODEC => 'getAudioCodec',
self::TC_DATA_ORIENTATION => 'getOrientation',
]; ];
foreach ($methods as $tc_name => $method) { foreach ($methods as $tc_name => $method) {

View File

@@ -0,0 +1,11 @@
<h1>{{ 'ElasticSearch configuration' | trans }}</h1>
<form method="post" action="{{ path('admin_searchengine_post') }}">
<div>{{ 'ElasticSearch connection configuration' | trans }}</div>
<div>{{ 'ElasticSearch server' | trans }}</div>
<input type="text" name="host" value="{{ configuration['host'] | default('localhost') }}"/>
<input type="text" name="port" value="{{ configuration['port'] | default('9200') }}"/>
<button type="submit" class="btn btn-warning" >{{ 'boutton::valider' | trans }}</button>
</form>

View File

@@ -0,0 +1,36 @@
<?php
namespace Alchemy\Tests\Phrasea\SearchEngine;
use Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
class ElasticSearchEngineTest extends SearchEngineAbstractTest
{
public function setUp()
{
parent::setUp();
$es = ElasticSearchEngine::create(self::$DI['app']);
$indexer = new Indexer($es, self::$DI['app']['monolog'], self::$DI['app']['phraseanet.appbox']);
$indexer->createIndex();
$indexer->reindexAll();
}
public function initialize()
{
self::$searchEngine = ElasticSearchEngine::create(self::$DI['app']);
self::$searchEngineClass = 'Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine';
}
public function testAutocomplete()
{
}
protected function updateIndex(array $stemms = [])
{
$searchEngine = ElasticSearchEngine::create(self::$DI['app']);
$searchEngine->getClient()->indices()->refresh(['index' => 'phraseanet']);
}
}