diff --git a/.travis.yml b/.travis.yml index 94bee2b0f2..e7cb7edb8f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ services: - mysql - memcached - redis + - elasticsearch before_script: - node --version diff --git a/bin/console b/bin/console index a028910053..d6ff57bdbc 100755 --- a/bin/console +++ b/bin/console @@ -11,7 +11,7 @@ namespace KonsoleKommander; -use Alchemy\Phrasea\Core\Version; +use Alchemy\Phrasea\Command\SearchEngine\IndexFull; use Alchemy\Phrasea\Command\BuildMissingSubdefs; use Alchemy\Phrasea\Command\CreateCollection; use Alchemy\Phrasea\Command\MailTest; @@ -19,6 +19,7 @@ use Alchemy\Phrasea\Command\Compile\Configuration; use Alchemy\Phrasea\Command\RecordAdd; use Alchemy\Phrasea\Command\RescanTechnicalDatas; use Alchemy\Phrasea\Command\UpgradeDBDatas; +use Alchemy\Phrasea\Core\Version; use Alchemy\Phrasea\CLI; use Alchemy\Phrasea\Command\Plugin\AddPlugin; use Alchemy\Phrasea\Command\Plugin\RemovePlugin; @@ -107,6 +108,10 @@ $cli->command(new Configuration()); $cli->command(new XSendFileConfigurationDumper()); $cli->command(new XSendFileMappingGenerator()); +if ($cli['phraseanet.SE']->getName() === 'ElasticSearch') { + $cli->command(new IndexFull('searchengine:index')); +} + $cli->loadPlugins(); exit(is_int($cli->run()) ? : 1); diff --git a/composer.json b/composer.json index 9509ed226b..c2918c3a98 100644 --- a/composer.json +++ b/composer.json @@ -13,6 +13,7 @@ "dailymotion/sdk" : "~1.5", "data-uri/data-uri" : "~0.1.0", "doctrine/orm" : "~2.4.0", + "elasticsearch/elasticsearch" : "~0.4", "facebook/php-sdk" : "~3.0", "gedmo/doctrine-extensions" : "~2.3.0", "alchemy/google-plus-api-client" : "~0.6.2", diff --git a/composer.lock b/composer.lock index 7828162ee0..274007dd0c 100644 --- a/composer.lock +++ b/composer.lock @@ -3,7 +3,7 @@ "This file locks the dependencies of your project to a known state", "Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file" ], - "hash": "713759a9aea9d0178ef01ce2f7aea6b2", + "hash": "cf050120950d3b57c24a52d01bbf94a3", "packages": [ { "name": "alchemy-fr/tcpdf-clone", @@ -1036,6 +1036,61 @@ ], "time": "2013-11-12 12:40:13" }, + { + "name": "elasticsearch/elasticsearch", + "version": "v0.4.3", + "source": { + "type": "git", + "url": "https://github.com/elasticsearch/elasticsearch-php.git", + "reference": "f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/elasticsearch/elasticsearch-php/zipball/f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f", + "reference": "f8116d7dc79e6602714e0bcf5a97f1753ef6ca9f", + "shasum": "" + }, + "require": { + "ext-curl": "*", + "guzzle/guzzle": "~3.7", + "monolog/monolog": "~1.5", + "php": ">=5.3.9", + "pimple/pimple": "~1.0", + "psr/log": "~1.0" + }, + "require-dev": { + "athletic/athletic": "~0.1", + "mikey179/vfsstream": "~1.2", + "mockery/mockery": "dev-master@dev", + "phpunit/phpunit": "3.7.*", + "satooshi/php-coveralls": "dev-master", + "symfony/yaml": "2.4.*@dev" + }, + "type": "library", + "autoload": { + "psr-0": { + "Elasticsearch": "src/", + "Elasticsearch\\Tests": "tests/", + "Elasticsearch\\Benchmarks": "benchmarks/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache 2" + ], + "authors": [ + { + "name": "Zachary Tong" + } + ], + "description": "PHP Client for Elasticsearch", + "keywords": [ + "client", + "elasticsearch", + "search" + ], + "time": "2013-12-02 15:31:03" + }, { "name": "evenement/evenement", "version": "v1.0.0", @@ -1124,16 +1179,16 @@ }, { "name": "gedmo/doctrine-extensions", - "version": "v2.3.8", + "version": "v2.3.9", "source": { "type": "git", "url": "https://github.com/l3pp4rd/DoctrineExtensions.git", - "reference": "d2bef940268e4dc5f0f050e80458169a7267813a" + "reference": "35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/l3pp4rd/DoctrineExtensions/zipball/d2bef940268e4dc5f0f050e80458169a7267813a", - "reference": "d2bef940268e4dc5f0f050e80458169a7267813a", + "url": "https://api.github.com/repos/l3pp4rd/DoctrineExtensions/zipball/35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54", + "reference": "35adcaae1a3f50d0d5b73aa50ed8fd28ee35ce54", "shasum": "" }, "require": { @@ -1200,7 +1255,7 @@ "tree", "uploadable" ], - "time": "2013-11-09 21:23:13" + "time": "2014-01-12 16:34:06" }, { "name": "guzzle/guzzle", @@ -2669,12 +2724,12 @@ "source": { "type": "git", "url": "https://github.com/silexphp/Silex.git", - "reference": "5633f1b652a6cbc0c0ed4b6b86c0560c4560731a" + "reference": "93dde2797c6e8498b844ac0eca6e8dd6713fb172" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/silexphp/Silex/zipball/5633f1b652a6cbc0c0ed4b6b86c0560c4560731a", - "reference": "5633f1b652a6cbc0c0ed4b6b86c0560c4560731a", + "url": "https://api.github.com/repos/silexphp/Silex/zipball/93dde2797c6e8498b844ac0eca6e8dd6713fb172", + "reference": "93dde2797c6e8498b844ac0eca6e8dd6713fb172", "shasum": "" }, "require": { @@ -2745,7 +2800,7 @@ "keywords": [ "microframework" ], - "time": "2013-12-03 18:11:54" + "time": "2014-01-08 07:22:38" }, { "name": "silex/web-profiler", diff --git a/lib/Alchemy/Phrasea/Command/SearchEngine/IndexFull.php b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexFull.php new file mode 100644 index 0000000000..0e29d9ccb9 --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexFull.php @@ -0,0 +1,27 @@ +container['ES.indexer']->createIndex(); + $this->container['ES.indexer']->reindexAll(); + } +} diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php index 57c7e24c47..1906cef073 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php @@ -14,12 +14,13 @@ namespace Alchemy\Phrasea\Core\Provider; use Alchemy\Phrasea\SearchEngine\SearchEngineLogger; use Alchemy\Phrasea\Exception\InvalidArgumentException; use Alchemy\Phrasea\SearchEngine\SearchEngineInterface; +use Alchemy\Phrasea\SearchEngine\Elastic\Indexer; +use Elasticsearch\Client; use Silex\Application; use Silex\ServiceProviderInterface; class SearchEngineServiceProvider implements ServiceProviderInterface { - public function register(Application $app) { $app['phraseanet.SE'] = $app->share(function ($app) { @@ -45,6 +46,10 @@ class SearchEngineServiceProvider implements ServiceProviderInterface $app['phraseanet.SE.subscriber'] = $app->share(function ($app) { return $app['phraseanet.SE.engine-class']::createSubscriber($app); }); + + $app['ES.indexer'] = $app->share(function ($app) { + return new Indexer($app['phraseanet.SE'], $app['monolog'], $app['phraseanet.appbox']); + }); } public function boot(Application $app) diff --git a/lib/Alchemy/Phrasea/Core/Provider/SerializerServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SerializerServiceProvider.php index aba7fe4af8..1ecc913c37 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SerializerServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SerializerServiceProvider.php @@ -12,6 +12,7 @@ namespace Alchemy\Phrasea\Core\Provider; use Alchemy\Phrasea\Model\Serializer\CaptionSerializer; +use Alchemy\Phrasea\Model\Serializer\ESRecordSerializer; use Silex\Application; use Silex\ServiceProviderInterface; @@ -22,6 +23,10 @@ class SerializerServiceProvider implements ServiceProviderInterface $app['serializer.caption'] = $app->share(function (Application $app) { return new CaptionSerializer(); }); + + $app['serializer.es-record'] = $app->share(function (Application $app) { + return new ESRecordSerializer(); + }); } public function boot(Application $app) diff --git a/lib/Alchemy/Phrasea/Model/Serializer/AbstractSerializer.php b/lib/Alchemy/Phrasea/Model/Serializer/AbstractSerializer.php new file mode 100644 index 0000000000..5e43c50d66 --- /dev/null +++ b/lib/Alchemy/Phrasea/Model/Serializer/AbstractSerializer.php @@ -0,0 +1,49 @@ +saveXML(); } - - protected function sanitizeSerializedValue($value) - { - return str_replace([ - "\x00", //null - "\x01", //start heading - "\x02", //start text - "\x03", //end of text - "\x04", //end of transmission - "\x05", //enquiry - "\x06", //acknowledge - "\x07", //bell - "\x08", //backspace - "\x0C", //new page - "\x0E", //shift out - "\x0F", //shift in - "\x10", //data link escape - "\x11", //dc 1 - "\x12", //dc 2 - "\x13", //dc 3 - "\x14", //dc 4 - "\x15", //negative ack - "\x16", //synchronous idle - "\x17", //end of trans block - "\x18", //cancel - "\x19", //end of medium - "\x1A", //substitute - "\x1B", //escape - "\x1C", //file separator - "\x1D", //group sep - "\x1E", //record sep - "\x1F", //unit sep - ], '', $value); - } } diff --git a/lib/Alchemy/Phrasea/Model/Serializer/ESRecordSerializer.php b/lib/Alchemy/Phrasea/Model/Serializer/ESRecordSerializer.php new file mode 100644 index 0000000000..d6868eef5c --- /dev/null +++ b/lib/Alchemy/Phrasea/Model/Serializer/ESRecordSerializer.php @@ -0,0 +1,81 @@ +get_technical_infos() as $name => $value) { + $technicalInformation[$name] = $value; + } + + foreach ($record->get_caption()->get_fields(null, true) as $field) { + $isDate = $field->get_databox_field()->get_type() === \databox_field::TYPE_DATE; + $isBusiness = $field->get_databox_field()->isBusiness(); + + $vi = $field->get_values(); + if ($field->is_multi()) { + $values = []; + foreach ($vi as $value) { + $values[] = $this->sanitizeSerializedValue($value->getValue()); + } + $value = implode (' ' . $field->get_databox_field()->get_separator(false).' ', $values); + } else { + $value = $this->sanitizeSerializedValue(array_pop($vi)->getValue()); + } + + if ($isDate) { + try { + $date = new \DateTime($value); + $value = $date->format(DATE_ATOM); + } catch (\Exception $e) { + continue; + } + } + + if ($isBusiness) { + $business[$field->get_databox_field()->get_name()] = $value; + } + + $caption[$field->get_databox_field()->get_name()] = $value; + } + + $i = 0; + foreach (preg_split('//', strrev($record->get_status()), -1, PREG_SPLIT_NO_EMPTY) as $val) { + $status['status-'.$i] = (int) $val; + $i++; + } + + return [ + 'databox_id' => $record->get_sbas_id(), + 'record_id' => $record->get_record_id(), + 'collection_id' => $record->get_collection()->get_coll_id(), + 'base_id' => $record->get_base_id(), + 'mime_type' => $record->get_mime(), + 'title' => $record->get_title(), + 'original_name' => $record->get_original_name(), + 'updated_on' => $record->get_modification_date()->format(DATE_ATOM), + 'created_on' => $record->get_creation_date()->format(DATE_ATOM), + 'sha256' => $record->get_sha256(), + 'technical_informations' => $technicalInformation, + 'phrasea_type' => $record->get_type(), + 'type' => $record->is_grouping() ? 'story' : 'record', + 'uuid' => $record->get_uuid(), + 'caption' => $caption, + 'status' => $status, + 'caption-business' => $business, + ]; + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ConfigurationPanel.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ConfigurationPanel.php new file mode 100644 index 0000000000..3e7605671d --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ConfigurationPanel.php @@ -0,0 +1,67 @@ +searchEngine = $engine; + $this->conf = $conf; + } + + /** + * {@inheritdoc} + */ + public function getName() + { + return 'elastic-search-engine'; + } + + /** + * {@inheritdoc} + */ + public function get(Application $app, Request $request) + { + return $app['twig']->render('admin/search-engine/elastic-search.html.twig', ['configuration' => $this->getConfiguration()]); + } + + /** + * {@inheritdoc} + */ + public function post(Application $app, Request $request) + { + $configuration = $this->getConfiguration(); + + $configuration['host'] = $request->request->get('host'); + $configuration['port'] = $request->request->get('port'); + + $this->saveConfiguration($configuration); + + return $app->redirectPath('admin_searchengine_get'); + } + + /** + * {@inheritdoc} + */ + public function getConfiguration() + { + return isset($this->conf['main']['search-engine']['options']) ? $this->conf['main']['search-engine']['options'] : []; + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php new file mode 100644 index 0000000000..d6a93d8a87 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -0,0 +1,581 @@ +app = $app; + $this->client = $client; + $this->serializer = $serializer; + + if ('' === trim($indexName)) { + throw new \InvalidArgumentException('The provided index name is invalid.'); + } + + $this->indexName = $indexName; + } + + public function getIndexName() + { + return $this->indexName; + } + + /** + * @return Client + */ + public function getClient() + { + return $this->client; + } + + /** + * {@inheritdoc} + */ + public function getName() + { + return 'ElasticSearch'; + } + + /** + * {@inheritdoc} + */ + public function getStatus() + { + $data = $this->client->info(); + $version = $data['version']; + unset($data['version']); + + foreach ($version as $prop => $value) { + $data['version:'.$prop] = $value; + } + + $ret = []; + + foreach ($data as $key => $value) { + $ret[] = [$key, $value]; + } + + return $ret; + } + + /** + * {@inheritdoc} + */ + public function getConfigurationPanel() + { + if (!$this->configurationPanel) { + $this->configurationPanel = new ConfigurationPanel($this, $this->app['configuration.store']); + } + + return $this->configurationPanel; + } + + /** + * {@inheritdoc} + */ + public function getAvailableDateFields() + { + if (!$this->dateFields) { + foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) { + foreach ($databox->get_meta_structure() as $databox_field) { + if ($databox_field->get_type() != \databox_field::TYPE_DATE) { + continue; + } + + $this->dateFields[] = $databox_field->get_name(); + } + } + + $this->dateFields = array_unique($this->dateFields); + } + + return $this->dateFields; + } + + /** + * {@inheritdoc} + */ + public function getAvailableSort() + { + return [ + 'score' => $this->app->trans('pertinence'), + 'created_on' => $this->app->trans('date dajout'), + ]; + } + + /** + * {@inheritdoc} + */ + public function getDefaultSort() + { + return 'score'; + } + + /** + * {@inheritdoc} + */ + public function isStemmingEnabled() + { + return false; + } + + /** + * {@inheritdoc} + */ + public function getAvailableOrder() + { + return [ + 'desc' => $this->app->trans('descendant'), + 'asc' => $this->app->trans('ascendant'), + ]; + } + + /** + * {@inheritdoc} + */ + public function hasStemming() + { + return false; + } + + /** + * {@inheritdoc} + */ + public function getAvailableTypes() + { + return [self::GEM_TYPE_RECORD, self::GEM_TYPE_STORY]; + } + + /** + * {@inheritdoc} + */ + public function addRecord(\record_adapter $record) + { + $this->doExecute('index', [ + 'body' => $this->serializer->serialize($record), + 'index' => $this->indexName, + 'type' => 'record', + 'id' => sprintf('%d-%d', $record->get_sbas_id(), $record->get_record_id()), + ]); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function removeRecord(\record_adapter $record) + { + $this->doExecute('delete', [ + 'index' => $this->indexName, + 'type' => 'record', + 'id' => sprintf('%s-%s', $record->get_sbas_id(), $record->get_record_id()), + ]); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function updateRecord(\record_adapter $record) + { + $this->addRecord($record); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function addStory(\record_adapter $story) + { + $this->addRecord($story); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function removeStory(\record_adapter $story) + { + $this->removeRecord($story); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function updateStory(\record_adapter $story) + { + $this->addRecord($story); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function addFeedEntry(FeedEntry $entry) + { + throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.'); + } + + /** + * {@inheritdoc} + */ + public function removeFeedEntry(FeedEntry $entry) + { + throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.'); + } + + /** + * {@inheritdoc} + */ + public function updateFeedEntry(FeedEntry $entry) + { + throw new RuntimeException('ElasticSearch engine does not support feed entry indexing.'); + } + + /** + * {@inheritdoc} + */ + public function query($query, $offset, $perPage, SearchEngineOptions $options = null) + { + $query = 'all' !== strtolower($query) ? $query : ''; + $params = $this->createQueryParams($query, $options ?: new SearchEngineOptions()); + $params['from'] = $offset; + $params['size'] = $perPage; + + $res = $this->doExecute('search', $params); + + $results = new ArrayCollection(); + $suggestions = new ArrayCollection(); + $n = 0; + + foreach ($res['hits']['hits'] as $hit) { + $results[] = new \record_adapter($this->app, $hit['fields']['databox_id'], $hit['fields']['record_id'], $n); + $n++; + } + + return new SearchEngineResult($results, $query, $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $suggestions, [], $this->indexName); + } + + /** + * {@inheritdoc} + */ + public function autocomplete($query, SearchEngineOptions $options) + { + throw new RuntimeException('ElasticSearch engine currently does not support auto-complete.'); + } + + /** + * {@inheritdoc} + */ + public function excerpt($query, $fields, \record_adapter $record, SearchEngineOptions $options = null) + { + $query = 'all' !== strtolower($query) ? $query : ''; + $params = $this->createQueryParams($query, $options ?: new SearchEngineOptions(), $record); + + $res = $this->doExecute('search', $params); + + foreach ($fields as $name => $field) { + if (isset($res['hits']['hits'][0]['highlight']['caption.'.$name])) { + $ret[] = $res['hits']['hits'][0]['highlight']['caption.'.$name][0]; + } else { + $ret[] = $field['value']; + } + } + + return $ret; + } + + /** + * {@inheritdoc} + */ + public function resetCache() + { + } + + /** + * {@inheritdoc} + */ + public function clearCache() + { + } + + /** + * {@inheritdoc} + */ + public function clearAllCache(\DateTime $date = null) + { + } + + /** + * {@inheritdoc} + */ + public static function createSubscriber(Application $app) + { + return new ElasticSearchEngineSubscriber(); + } + + /** + * {@inheritdoc} + * + * @return ElastcSearchEngine + */ + public static function create(Application $app, array $options = []) + { + $options = array_replace([ + 'host' => '127.0.0.1', + 'port' => '9200', + 'index' => 'phraseanet', + ], $options); + + $client = new Client(['hosts' => [sprintf('%s:%s', $options['host'], $options['port'])]]); + + return new static($app, $client, $app['serializer.es-record'], $options['index']); + } + + private function createQueryParams($query, SearchEngineOptions $options, \record_adapter $record = null) + { + $params = [ + 'index' => $this->indexName, + 'type' => 'record', + 'body' => [ + 'fields' => ['databox_id', 'record_id'], + 'sort' => $this->createSortQueryParams($options), + ] + ]; + + $ESquery = $this->createESQuery($query, $options); + $filters = $this->createFilters($options); + + if ($record) { + $filters[] = [ + 'term' => [ + '_id' => sprintf('%s-%s', $record->get_sbas_id(), $record->get_record_id()), + ] + ]; + + $fields = []; + + foreach ($record->get_databox()->get_meta_structure() as $dbField) { + $fields['caption.'.$dbField->get_name()] = new \stdClass(); + } + + $params['body']['highlight'] = [ + "pre_tags" => ["[[em]]"], + "post_tags" => ["[[/em]]"], + "fields" => $fields, + ]; + } + + if (count($filters) > 0) { + $ESquery = [ + 'filtered' => [ + 'query' => $ESquery, + 'filter' => [ + 'and' => $filters + ] + ] + ]; + } + + $params['body']['query'] = $ESquery; + + return $params; + } + + private function createESQuery($query, SearchEngineOptions $options) + { + $preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); + + $search = []; + if ($preg > 0) { + $search['bool']['must'][] = [ + 'term' => [ + 'record_id' => $matches[2], + ], + ]; + $query = ''; + } + + if ('' !== $query) { + if (0 < count($options->getBusinessFieldsOn())) { + $fields = []; + + foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) { + foreach ($databox->get_meta_structure() as $dbField) { + if ($dbField->isBusiness()) { + $fields[$dbField->get_name()] = [ + 'match' => [ + 'caption.'.$dbField->get_name() => $query, + ] + ]; + } + } + } + + if (count($fields) > 0) { + foreach ($options->getBusinessFieldsOn() as $coll) { + $search['bool']['should'][] = [ + 'bool' => [ + 'must' => [ + [ + 'bool' => [ + 'should' => array_values($fields) + ] + ],[ + 'term' => [ + 'base_id' => $coll->get_base_id(), + ] + ] + ] + ] + ]; + } + } + } + + if ($options->getFields()) { + foreach ($options->getFields() as $field) { + $search['bool']['should'][] = [ + 'match' => [ + 'caption.'.$field->get_name() => $query, + ] + ]; + } + } else { + $search['bool']['should'][] = [ + 'match' => [ + '_all' => $query, + ] + ]; + } + } else { + $search['bool']['should'][] = [ + 'match_all' => new \stdClass(), + ]; + } + + return $search; + } + + private function createFilters(SearchEngineOptions $options) + { + $filters = []; + + $status_opts = $options->getStatus(); + foreach ($options->getDataboxes() as $databox) { + foreach ($databox->get_statusbits() as $n => $status) { + if (!array_key_exists($n, $status_opts)) { + continue; + } + if (!array_key_exists($databox->get_sbas_id(), $status_opts[$n])) { + continue; + } + + $filters[] = [ + 'term' => [ + 'status.status-'.$n => $status_opts[$n][$databox->get_sbas_id()], + ] + ]; + } + } + + $filters[] = [ + 'terms' => [ + 'base_id' => array_map(function (\collection $coll) { return $coll->get_base_id(); }, $options->getCollections()) + ] + ]; + $filters[] = [ + 'term' => [ + 'type' => $options->getSearchType() === SearchEngineOptions::RECORD_RECORD ? 'record' : 'story', + ] + ]; + + if ($options->getDateFields() && ($options->getMaxDate() || $options->getMinDate())) { + $range = []; + if ($options->getMaxDate()) { + $range['lte'] = $options->getMaxDate()->format(DATE_ATOM); + } + if ($options->getMinDate()) { + $range['gte'] = $options->getMinDate()->format(DATE_ATOM); + } + + foreach ($options->getDateFields() as $dateField) { + $filters[] = [ + 'range' => [ + 'caption.'.$dateField->get_name() => $range + ] + ]; + } + } + + if ($options->getRecordType()) { + $filters[] = [ + 'term' => [ + 'phrasea_type' => $options->getRecordType(), + ] + ]; + } + + return $filters; + } + + private function createSortQueryParams(SearchEngineOptions $options) + { + $sort = []; + if ($options->getSortBy() === 'score') { + $sort['_score'] = $options->getSortOrder(); + } + + $sort['created_on'] = $options->getSortOrder(); + + return $sort; + } + + private function doExecute($method, array $params) + { + $res = call_user_func([$this->client, $method], $params); + + if (isset($res['error'])) { + throw new RuntimeException('Unable to execute method '.$method); + } + + return $res; + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngineSubscriber.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngineSubscriber.php new file mode 100644 index 0000000000..a38cae3382 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngineSubscriber.php @@ -0,0 +1,22 @@ +engine = $engine; + $this->logger = $logger; + $this->appbox = $appbox; + } + + public function createIndex() + { + $indexParams['index'] = $this->engine->getIndexName(); + + // Index Settings + $indexParams['body']['settings']['number_of_shards'] = 3; + $indexParams['body']['settings']['number_of_replicas'] = 0; + + $captionFields = []; + $businessFields = []; + + foreach ($this->appbox->get_databoxes() as $databox) { + foreach ($databox->get_meta_structure() as $dbField) { + $type = 'string'; + if (\databox_field::TYPE_DATE === $dbField->get_type()) { + $type = 'date'; + } + if (isset($captionFields[$dbField->get_name()]) && $type !== $captionFields[$dbField->get_name()]['type']) { + $type = 'string'; + } + + $captionFields[$dbField->get_name()] = [ + 'type' => $type, + 'include_in_all' => !$dbField->isBusiness(), + 'analyzer' => 'french', + ]; + + if ($dbField->isBusiness()) { + $businessFields[$dbField->get_name()] = [ + 'type' => $type, + 'include_in_all' => false, + 'analyzer' => 'french', + ]; + } + } + } + + $status = []; + for ($i = 0; $i <= 32; $i ++) { + $status['status-'.$i] = [ + 'type' => 'integer', + ]; + } + + $recordTypeMapping = [ + '_source' => [ + 'enabled' => true + ], + '_all' => [ + 'analyzer' => 'french', + ], + 'analysis' => [ + 'analyzer' => [ + 'french' => [ + 'type' => 'custom', + 'tokenizer' => 'letter', + 'filter' => ["asciifolding", "lowercase", "french_stem", "stop_fr"] + ], + 'autocomplete_french' => [ + 'type' => 'custom', + 'tokenizer' => 'letter', + 'filter' => ["asciifolding", "lowercase", "stop_fr"] + ] + ], + 'filter' => [ + 'stop_fr' => [ + 'type' => 'stop', + 'stopwords' => ['l', 'm', 't', 'qu', 'n', 's', 'j', 'd'], + ] + ], + ], + 'properties' => [ + 'record_id' => [ + 'type' => 'integer', + 'index' => 'not_analyzed', + ], + 'databox_id' => [ + 'type' => 'integer', + 'index' => 'not_analyzed', + ], + 'base_id' => [ + 'type' => 'integer', + 'index' => 'not_analyzed', + ], + 'mime_type' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'title' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'original_name' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'updated_on' => [ + 'type' => 'date', + 'index' => 'not_analyzed', + ], + 'created_on' => [ + 'type' => 'date', + 'index' => 'not_analyzed', + ], + 'collection_id' => [ + 'type' => 'integer', + 'index' => 'not_analyzed', + ], + 'sha256' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'type' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'phrasea_type' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'uuid' => [ + 'type' => 'string', + 'index' => 'not_analyzed', + ], + 'status' => [ + 'properties' => $status + ], + "technical_informations" => [ + 'properties' => [ + \media_subdef::TC_DATA_WIDTH => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_HEIGHT => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_COLORSPACE => [ + 'type' => 'string' + ], + \media_subdef::TC_DATA_CHANNELS => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_ORIENTATION => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_COLORDEPTH => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_DURATION => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_AUDIOCODEC => [ + 'type' => 'string' + ], + \media_subdef::TC_DATA_AUDIOSAMPLERATE => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_VIDEOCODEC => [ + 'type' => 'string' + ], + \media_subdef::TC_DATA_FRAMERATE => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_MIMETYPE => [ + 'type' => 'string' + ], + \media_subdef::TC_DATA_FILESIZE => [ + 'type' => 'long' + ], + \media_subdef::TC_DATA_LONGITUDE => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_LATITUDE => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_FOCALLENGTH => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_CAMERAMODEL => [ + 'type' => 'string' + ], + \media_subdef::TC_DATA_FLASHFIRED => [ + 'type' => 'boolean' + ], + \media_subdef::TC_DATA_APERTURE => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_SHUTTERSPEED => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_HYPERFOCALDISTANCE => [ + 'type' => 'float' + ], + \media_subdef::TC_DATA_ISO => [ + 'type' => 'integer' + ], + \media_subdef::TC_DATA_LIGHTVALUE => [ + 'type' => 'float' + ], + ] + ], + "caption" => [ + 'properties' => $captionFields + ], + ] + ]; + + if (0 < count ($businessFields)) { + $recordTypeMapping['properties']['caption-business'] = [ + 'properties' => $businessFields + ]; + } + + $indexParams['body']['mappings']['record'] = $recordTypeMapping; + + if ($this->engine->getClient()->indices()->exists(['index' => $this->engine->getIndexName()])) { + $this->engine->getClient()->indices()->delete(['index' => $this->engine->getIndexName()]); + } + + $ret = $this->engine->getClient()->indices()->create($indexParams); + + if (isset($ret['error']) || !$ret['ok']) { + throw new \RuntimeException('Unable to create index'); + } + } + + public function reindexAll() + { + $qty = 10; + + $params['index'] = $this->engine->getIndexName(); + $params['body']['index']['refresh_interval'] = 300; + + $ret = $this->engine->getClient()->indices()->putSettings($params); + + if (!isset($ret['ok']) || !$ret['ok']) { + $this->logger->error('Unable to set the refresh interval to 300 s. .'); + } + + foreach ($this->appbox->get_databoxes() as $databox) { + $offset = 0; + do { + $sql = 'SELECT record_id FROM record + WHERE parent_record_id = 0 + ORDER BY record_id ASC LIMIT '.$offset.', '.$qty; + $stmt = $databox->get_connection()->prepare($sql); + $stmt->execute(); + $rows = $stmt->fetchAll(\PDO::FETCH_ASSOC); + $stmt->closeCursor(); + + foreach ($rows as $row) { + $record = $databox->get_record($row['record_id']); + $this->engine->addRecord($record); + } + + gc_collect_cycles(); + $offset += $qty; + } while (count($rows) > 0); + } + + $params['index'] = $this->engine->getIndexName(); + $params['body']['index']['refresh_interval'] = 1; + + $ret = $this->engine->getClient()->indices()->putSettings($params); + + if (!isset($ret['ok']) || !$ret['ok']) { + throw new \RuntimeException('Unable to set the refresh interval to 1 s. .'); + } + } +} diff --git a/lib/classes/databox/field.php b/lib/classes/databox/field.php index 86646e0a75..cf7d9313af 100644 --- a/lib/classes/databox/field.php +++ b/lib/classes/databox/field.php @@ -826,12 +826,17 @@ class databox_field implements cache_cacheableInterface } /** + * @param Boolean $all If set to false, returns a one-char separator to use for serialiation * * @return string */ - public function get_separator() + public function get_separator($all = true) { - return $this->separator; + if ($all) { + return $this->separator; + } + + return substr($this->separator, 0, 1); } /** diff --git a/lib/classes/media/subdef.php b/lib/classes/media/subdef.php index 40bb1d5076..f29b98abbe 100644 --- a/lib/classes/media/subdef.php +++ b/lib/classes/media/subdef.php @@ -624,6 +624,7 @@ class media_subdef extends media_abstract implements cache_cacheableInterface self::TC_DATA_AUDIOSAMPLERATE => 'getAudioSampleRate', self::TC_DATA_VIDEOCODEC => 'getVideoCodec', self::TC_DATA_AUDIOCODEC => 'getAudioCodec', + self::TC_DATA_ORIENTATION => 'getOrientation', ]; foreach ($methods as $tc_name => $method) { diff --git a/templates/web/admin/search-engine/elastic-search.html.twig b/templates/web/admin/search-engine/elastic-search.html.twig new file mode 100644 index 0000000000..638ecb9f62 --- /dev/null +++ b/templates/web/admin/search-engine/elastic-search.html.twig @@ -0,0 +1,11 @@ +