diff --git a/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php
index 2704226b3b..92fd8f89d1 100644
--- a/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php
+++ b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php
@@ -59,13 +59,8 @@ class IndexPopulateCommand extends Command
throw new \RuntimeException("Could not provide --thesaurus and --records option at the same time.");
}
- $databoxes_id = $input->getOption('databox_id');
+ $databoxes = $input->getOption('databox_id');
- $app = $this->container;
- foreach($app->getDataboxes() as $databox) {
- if(!$databoxes_id || in_array($databox->get_sbas_id(), $databoxes_id)) {
- $this->container['elasticsearch.indexer']->populateIndex($what, $databox);
- }
- }
+ $this->container['elasticsearch.indexer']->populateIndex($what, $databoxes);
}
}
diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php
index 52864ac767..ac9bdb8c4a 100644
--- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php
+++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php
@@ -89,17 +89,12 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
$app['elasticsearch.options'],
$app['elasticsearch.indexer.term_indexer'],
$app['elasticsearch.indexer.record_indexer'],
- $app['phraseanet.appbox'],
- new Logger('es.indexer')
+ $app['phraseanet.appbox']
);
});
$app['elasticsearch.indexer.term_indexer'] = $app->share(function ($app) {
- return new TermIndexer(
- $app['phraseanet.appbox'],
- array_keys($app['locales.available']),
- new Logger('term.indexer')
- );
+ return new TermIndexer($app['phraseanet.appbox'], array_keys($app['locales.available']));
});
$app['elasticsearch.indexer.record_indexer'] = $app->share(function ($app) {
diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php
index ad14d3ec0a..bc0da95783 100644
--- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php
+++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php
@@ -22,7 +22,6 @@ use Elasticsearch\Client;
use Psr\Log\LoggerInterface;
use igorw;
use Psr\Log\NullLogger;
-use record_adapter;
use Symfony\Component\Stopwatch\Stopwatch;
use SplObjectStorage;
@@ -42,9 +41,7 @@ class Indexer
private $recordIndexer;
private $termIndexer;
- /** @var SplObjectStorage */
- private $indexQueue; // contains record_adapter(s)
- /** @var SplObjectStorage */
+ private $indexQueue; // contains RecordInterface(s)
private $deleteQueue;
public function __construct(Client $client, ElasticsearchOptions $options, TermIndexer $termIndexer, RecordIndexer $recordIndexer, appbox $appbox, LoggerInterface $logger = null)
@@ -101,15 +98,21 @@ class Indexer
return $this->client->indices()->exists($params);
}
- public function populateIndex($what, \databox $databox)
+ public function populateIndex($what, array $databoxes_id = [])
{
$stopwatch = new Stopwatch();
$stopwatch->start('populate');
- $this->apply(function (BulkOperation $bulk) use ($what, $databox) {
+ if ($databoxes_id) {
+ // If databoxes are given, only use those
+ $databoxes = array_map(array($this->appbox, 'get_databox'), $databoxes_id);
+ } else {
+ $databoxes = $this->appbox->get_databoxes();
+ }
+ $this->apply(function(BulkOperation $bulk) use ($what, $databoxes) {
if ($what & self::THESAURUS) {
- $this->termIndexer->populateIndex($bulk, $databox);
+ $this->termIndexer->populateIndex($bulk, $databoxes);
// Record indexing depends on indexed terms so we need to make
// everything ready to search
@@ -118,7 +121,7 @@ class Indexer
}
if ($what & self::RECORDS) {
- $this->recordIndexer->populateIndex($this, $bulk, $databox);
+ $this->recordIndexer->populateIndex($bulk, $databoxes);
// Final flush
$bulk->flush();
@@ -130,7 +133,7 @@ class Indexer
});
$event = $stopwatch->stop('populate');
- $this->logger->info(sprintf("Indexation finished in %0.02f sec (Mem. %0.02f Mo)", ($event->getDuration()/1000), $event->getMemory()/1048576));
+ printf("Indexation finished in %s min (Mem. %s Mo)", ($event->getDuration()/1000/60), bcdiv($event->getMemory(), 1048576, 2));
}
public function migrateMappingForDatabox($databox)
@@ -157,24 +160,24 @@ class Indexer
RecordQueuer::queueRecordsFromCollection($collection);
}
- public function indexRecord(record_adapter $record)
+ public function indexRecord(RecordInterface $record)
{
$this->indexQueue->attach($record);
}
- public function deleteRecord(record_adapter $record)
+ public function deleteRecord(RecordInterface $record)
{
$this->deleteQueue->attach($record);
}
/**
- * @param \databox $databox databox to index
+ * @param \databox[] $databoxes databoxes to index
* @throws \Exception
*/
- public function indexScheduledRecords(\databox $databox)
+ public function indexScheduledRecords(array $databoxes)
{
- $this->apply(function(BulkOperation $bulk) use ($databox) {
- $this->recordIndexer->indexScheduled($this, $bulk, $databox);
+ $this->apply(function(BulkOperation $bulk) use($databoxes) {
+ $this->recordIndexer->indexScheduled($bulk, $databoxes);
});
}
@@ -189,7 +192,7 @@ class Indexer
}
$this->apply(function(BulkOperation $bulk) {
- $this->recordIndexer->index($this, $bulk, $this->indexQueue);
+ $this->recordIndexer->index($bulk, $this->indexQueue);
$this->recordIndexer->delete($bulk, $this->deleteQueue);
$bulk->flush();
});
diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php
index 92a1ee2ea6..2cbbef0ac0 100644
--- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php
+++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php
@@ -10,7 +10,7 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
-use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
+use Alchemy\Phrasea\Model\RecordInterface;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\FetcherDelegateInterface;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\RecordListFetcherDelegate;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\ScheduledFetcherDelegate;
@@ -30,7 +30,6 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms;
use databox;
use Iterator;
use Psr\Log\LoggerInterface;
-use record_adapter;
class RecordIndexer
{
@@ -94,44 +93,52 @@ class RecordIndexer
* index whole databox(es), don't test actual "jetons"
* called by command "populate"
*
- * @param Indexer $indexer
* @param BulkOperation $bulk
- * @param databox $databox
+ * @param databox[] $databoxes
*/
- public function populateIndex(Indexer $indexer, BulkOperation $bulk, databox $databox)
+ public function populateIndex(BulkOperation $bulk, array $databoxes)
{
- $submited_records = [];
+ foreach ($databoxes as $databox) {
- $this->logger->info(sprintf('Indexing database %s...', $databox->get_viewname()));
+ $submited_records = [];
- $fetcher = $this->createFetcherForDatabox($databox); // no delegate, scan the whole records
+ $this->logger->info(sprintf('Indexing database %s...', $databox->get_viewname()));
- // post fetch : flag records as "indexing"
- $fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) {
- RecordQueuer::didStartIndexingRecords($records, $databox);
- // do not restart the fetcher since it has no clause on jetons
- });
+ $fetcher = $this->createFetcherForDatabox($databox); // no delegate, scan the whole records
- // bulk flush : flag records as "indexed"
- $bulk->onFlush(function($operation_identifiers) use ($databox, &$submited_records) {
- $this->onBulkFlush($databox, $operation_identifiers, $submited_records);
- });
+ // post fetch : flag records as "indexing"
+ $fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) {
+ RecordQueuer::didStartIndexingRecords($records, $databox);
+ // do not restart the fetcher since it has no clause on jetons
+ });
- // Perform indexing
- $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records);
+ // bulk flush : flag records as "indexed"
+ $bulk->onFlush(function($operation_identifiers) use ($databox, &$submited_records) {
+ $this->onBulkFlush($databox, $operation_identifiers, $submited_records);
+ });
- $this->logger->info(sprintf('Finished indexing %s', $databox->get_viewname()));
+ // Perform indexing
+ $this->indexFromFetcher($bulk, $fetcher, $submited_records);
+
+ $this->logger->info(sprintf('Finished indexing %s', $databox->get_viewname()));
+ }
}
/**
- * Index the records flagged as "to_index" on databox
+ * Index the records flagged as "to_index" on databoxes
* called by task "indexer"
*
- * @param Indexer $indexer
* @param BulkOperation $bulk
- * @param databox $databox
+ * @param databox[] $databoxes
*/
- public function indexScheduled(Indexer $indexer, BulkOperation $bulk, databox $databox)
+ public function indexScheduled(BulkOperation $bulk, array $databoxes)
+ {
+ foreach ($databoxes as $databox) {
+ $this->indexScheduledInDatabox($bulk, $databox);
+ }
+ }
+
+ private function indexScheduledInDatabox(BulkOperation $bulk, databox $databox)
{
$submited_records = [];
@@ -141,7 +148,6 @@ class RecordIndexer
// post fetch : flag records as "indexing"
$fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) {
- $this->logger->debug(sprintf("indexing %d records", count($records)));
RecordQueuer::didStartIndexingRecords($records, $databox);
// because changing the flag on the records affects the "where" clause of the fetcher,
// restart it each time
@@ -154,17 +160,16 @@ class RecordIndexer
});
// Perform indexing
- $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records);
+ $this->indexFromFetcher($bulk, $fetcher, $submited_records);
}
/**
* Index a list of records
*
- * @param Indexer $indexer
* @param BulkOperation $bulk
* @param Iterator $records
*/
- public function index(Indexer $indexer, BulkOperation $bulk, Iterator $records)
+ public function index(BulkOperation $bulk, Iterator $records)
{
foreach ($this->createFetchersForRecords($records) as $fetcher) {
$submited_records = [];
@@ -182,7 +187,7 @@ class RecordIndexer
});
// Perform indexing
- $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records);
+ $this->indexFromFetcher($bulk, $fetcher, $submited_records);
}
}
@@ -222,18 +227,14 @@ class RecordIndexer
{
$connection = $databox->get_connection();
$candidateTerms = new CandidateTerms($databox);
- $fetcher = new Fetcher(
- $databox,
- array(
- new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->helper),
- new TitleHydrator($connection),
- new MetadataHydrator($connection, $this->structure, $this->helper),
- new FlagHydrator($this->structure, $databox),
- new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms),
- new SubDefinitionHydrator($connection)
- ),
- $delegate
- );
+ $fetcher = new Fetcher($databox, array(
+ new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->helper),
+ new TitleHydrator($connection),
+ new MetadataHydrator($connection, $this->structure, $this->helper),
+ new FlagHydrator($this->structure, $databox),
+ new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms),
+ new SubDefinitionHydrator($connection)
+ ), $delegate);
$fetcher->setBatchSize(200);
$fetcher->onDrain(function() use ($candidateTerms) {
$candidateTerms->save();
@@ -246,41 +247,21 @@ class RecordIndexer
{
$databoxes = array();
foreach ($records as $record) {
- /** @var record_adapter $record */
- $databox = $record->getDatabox();
- $k = $databox->get_sbas_id();
- if(!array_key_exists($k, $databoxes)) {
- $databoxes[$k] = [
- 'databox' => $databox,
- 'records' => []
- ];
- }
- $databoxes[$k]['records'][] = $record;
+ $databox = $record->get_databox();
+ $hash = spl_object_hash($databox);
+ $databoxes[$hash]['databox'] = $databox;
+ $databoxes[$hash]['records'][] = $record;
}
return array_values($databoxes);
}
- private function indexFromFetcher(Indexer $indexer, BulkOperation $bulk, Fetcher $fetcher, array &$submited_records)
+ private function indexFromFetcher(BulkOperation $bulk, Fetcher $fetcher, array &$submited_records)
{
- $databox = $fetcher->getDatabox();
- $first = true;
- /** @var record_adapter $record */
+ /** @var RecordInterface $record */
while ($record = $fetcher->fetch()) {
- if($first) {
- $sql = "SELECT prop FROM pref WHERE prop IN('thesaurus','thesaurus_index')"
- . " ORDER BY updated_on DESC, IF(prop='thesaurus', 'a', 'z') DESC LIMIT 1";
- if($databox->get_connection()->fetchColumn($sql) == 'thesaurus') {
- // the thesaurus was modified, enforce index
- $indexer->populateIndex(Indexer::THESAURUS, $databox);
- }
- $first = false;
- }
-
$op_identifier = $this->getUniqueOperationId($record['id']);
- $this->logger->debug(sprintf("indexing record %s of databox %s", $record['record_id'], $databox->get_sbas_id()));
-
$params = array();
$params['id'] = $record['id'];
unset($record['id']);
diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php
index c66f8583ba..76a00e07e0 100644
--- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php
+++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php
@@ -11,12 +11,13 @@
namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
+use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\BulkOperation;
use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Navigator;
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\TermVisitor;
use databox;
-use Psr\Log\LoggerInterface;
+use DOMDocument;
class TermIndexer
{
@@ -29,54 +30,42 @@ class TermIndexer
private $navigator;
private $locales;
- private $logger;
- public function __construct(\appbox $appbox, array $locales, LoggerInterface $logger)
+ public function __construct(\appbox $appbox, array $locales)
{
$this->appbox = $appbox;
$this->navigator = new Navigator();
$this->locales = $locales;
- $this->logger = $logger;
}
- public function populateIndex(BulkOperation $bulk, databox $databox)
+ public function populateIndex(BulkOperation $bulk, array $databoxes)
{
- $databoxId = $databox->get_sbas_id();
+ foreach ($databoxes as $databox) {
+ /** @var databox $databox */
+ $databoxId = $databox->get_sbas_id();
- $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) {
- // Path and id are prefixed with a databox identifier to not
- // collide with other databoxes terms
+ $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) {
+ // Path and id are prefixed with a databox identifier to not
+ // collide with other databoxes terms
- // Term structure
- $id = sprintf('%s_%s', $databoxId, $term['id']);
- unset($term['id']);
- $term['path'] = sprintf('/%s%s', $databoxId, $term['path']);
+ // Term structure
+ $id = sprintf('%s_%s', $databoxId, $term['id']);
+ unset($term['id']);
+ $term['path'] = sprintf('/%s%s', $databoxId, $term['path']);
+ $term['databox_id'] = $databoxId;
- $this->logger->debug(sprintf("Indexing term \"%s\"", $term['path']));
+ // Index request
+ $params = array();
+ $params['id'] = $id;
+ $params['type'] = self::TYPE_NAME;
+ $params['body'] = $term;
- $term['databox_id'] = $databoxId;
+ $bulk->index($params, null);
+ });
- // Index request
- $params = array();
- $params['id'] = $id;
- $params['type'] = self::TYPE_NAME;
- $params['body'] = $term;
-
- $bulk->index($params, null);
- });
-
-
- $indexDate = $databox->get_connection()->fetchColumn("SELECT updated_on FROM pref WHERE prop='thesaurus'");
-
- $document = Helper::thesaurusFromDatabox($databox);
- $this->navigator->walk($document, $visitor);
-
- $databox->get_connection()->executeUpdate(
- "INSERT INTO pref (prop, value, locale, updated_on, created_on)"
- . " VALUES ('thesaurus_index', '', '-', ?, NOW())"
- . " ON DUPLICATE KEY UPDATE updated_on=?",
- [$indexDate, $indexDate]
- );
+ $document = Helper::thesaurusFromDatabox($databox);
+ $this->navigator->walk($document, $visitor);
+ }
}
public function getMapping()
diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php
index a0fa40bbb4..7e374c44fa 100644
--- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php
+++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php
@@ -110,7 +110,7 @@ class IndexerSubscriber implements EventSubscriberInterface
public function onThesaurusChange(ThesaurusEvent $event)
{
$databox = $event->getDatabox();
- $databox->delete_data_from_cache(\databox::CACHE_THESAURUS);
+ $this->getIndexer()->scheduleRecordsFromDataboxForIndexing($databox);
}
public function onCollectionChange(CollectionEvent $event)
diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php
index 7c4ebba7f7..e771b48327 100644
--- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php
+++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php
@@ -67,10 +67,6 @@ class Helper
return $parents;
}
- /**
- * @param databox $databox
- * @return DOMDocument
- */
public static function thesaurusFromDatabox(databox $databox)
{
return self::document($databox->get_dom_thesaurus());
@@ -93,10 +89,6 @@ class Helper
return $document;
}
- /**
- * @param $document
- * @return DOMDocument
- */
private static function document($document)
{
if (!$document) {
diff --git a/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php b/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php
index 776ba36189..153b4ed4c1 100644
--- a/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php
+++ b/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php
@@ -11,10 +11,6 @@ namespace Alchemy\Phrasea\TaskManager\Job;
use Alchemy\Phrasea\TaskManager\Editor\IndexerEditor;
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
-use Alchemy\Phrasea\Core\Version;
-use Silex\Application;
-use Psr\Log\LoggerInterface;
-
class IndexerJob extends AbstractJob
{
@@ -51,16 +47,12 @@ class IndexerJob extends AbstractJob
*/
protected function doJob(JobData $data)
{
-
$app = $data->getApplication();
/** @var Indexer $indexer */
$indexer = $app['elasticsearch.indexer'];
-
- foreach($app->getDataboxes() as $databox) {
- if($app->getApplicationBox()->is_databox_indexable($databox)) {
- $indexer->indexScheduledRecords($databox);
- }
- }
+ $databoxes = array_filter($app->getDataboxes(), function (\databox $databox) use ($app) {
+ return $app->getApplicationBox()->is_databox_indexable($databox);
+ });
+ $indexer->indexScheduledRecords($databoxes);
}
-}
-
+}
\ No newline at end of file
diff --git a/templates/web/thesaurus/new-term.html.twig b/templates/web/thesaurus/new-term.html.twig
index 8c7c825392..fbbf141970 100644
--- a/templates/web/thesaurus/new-term.html.twig
+++ b/templates/web/thesaurus/new-term.html.twig
@@ -14,7 +14,7 @@
{% set opener = 'opener' %}
{% endif %}
- {% if context %}
+ {% if context is not none %}
{% set zterm %}
{% trans with {'%term%' : term, '%context%' : context} %}thesaurus:: le terme %term% avec contexte %context%{% endtrans %}
{% endset %}
@@ -57,13 +57,14 @@
{% else %}
{% if nb_candidates_bad > 0 %}
+ // present dans les candidats, mais aucun champ acceptable : on informe
{% set prop_label = 'thesaurus:: est candidat en provenance des champs mais ne peut etre accepte a cet emplacement du thesaurus' | trans %}
{% else %}
+ // pas present dans les candidats
{% set prop_label = 'thesaurus:: n\'est pas present dans les candidats' | trans %}
{% endif %}
-
-
+