From ba8be474237eff500827bfd6eedf496b19c6b95e Mon Sep 17 00:00:00 2001 From: Thibaud Fabre Date: Tue, 18 Oct 2016 20:15:23 +0200 Subject: [PATCH] Revert "PHRAS-714_thesaurus_indexation" --- .../SearchEngine/IndexPopulateCommand.php | 9 +- .../Provider/SearchEngineServiceProvider.php | 9 +- .../Phrasea/SearchEngine/Elastic/Indexer.php | 35 +++--- .../Elastic/Indexer/RecordIndexer.php | 115 ++++++++---------- .../Elastic/Indexer/TermIndexer.php | 61 ++++------ .../Elastic/IndexerSubscriber.php | 2 +- .../SearchEngine/Elastic/Thesaurus/Helper.php | 8 -- .../Phrasea/TaskManager/Job/IndexerJob.php | 18 +-- templates/web/thesaurus/new-term.html.twig | 26 ++-- 9 files changed, 121 insertions(+), 162 deletions(-) diff --git a/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php index 2704226b3b..92fd8f89d1 100644 --- a/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php +++ b/lib/Alchemy/Phrasea/Command/SearchEngine/IndexPopulateCommand.php @@ -59,13 +59,8 @@ class IndexPopulateCommand extends Command throw new \RuntimeException("Could not provide --thesaurus and --records option at the same time."); } - $databoxes_id = $input->getOption('databox_id'); + $databoxes = $input->getOption('databox_id'); - $app = $this->container; - foreach($app->getDataboxes() as $databox) { - if(!$databoxes_id || in_array($databox->get_sbas_id(), $databoxes_id)) { - $this->container['elasticsearch.indexer']->populateIndex($what, $databox); - } - } + $this->container['elasticsearch.indexer']->populateIndex($what, $databoxes); } } diff --git a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php index 52864ac767..ac9bdb8c4a 100644 --- a/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php +++ b/lib/Alchemy/Phrasea/Core/Provider/SearchEngineServiceProvider.php @@ -89,17 +89,12 @@ class SearchEngineServiceProvider implements ServiceProviderInterface $app['elasticsearch.options'], $app['elasticsearch.indexer.term_indexer'], $app['elasticsearch.indexer.record_indexer'], - $app['phraseanet.appbox'], - new Logger('es.indexer') + $app['phraseanet.appbox'] ); }); $app['elasticsearch.indexer.term_indexer'] = $app->share(function ($app) { - return new TermIndexer( - $app['phraseanet.appbox'], - array_keys($app['locales.available']), - new Logger('term.indexer') - ); + return new TermIndexer($app['phraseanet.appbox'], array_keys($app['locales.available'])); }); $app['elasticsearch.indexer.record_indexer'] = $app->share(function ($app) { diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php index ad14d3ec0a..bc0da95783 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php @@ -22,7 +22,6 @@ use Elasticsearch\Client; use Psr\Log\LoggerInterface; use igorw; use Psr\Log\NullLogger; -use record_adapter; use Symfony\Component\Stopwatch\Stopwatch; use SplObjectStorage; @@ -42,9 +41,7 @@ class Indexer private $recordIndexer; private $termIndexer; - /** @var SplObjectStorage */ - private $indexQueue; // contains record_adapter(s) - /** @var SplObjectStorage */ + private $indexQueue; // contains RecordInterface(s) private $deleteQueue; public function __construct(Client $client, ElasticsearchOptions $options, TermIndexer $termIndexer, RecordIndexer $recordIndexer, appbox $appbox, LoggerInterface $logger = null) @@ -101,15 +98,21 @@ class Indexer return $this->client->indices()->exists($params); } - public function populateIndex($what, \databox $databox) + public function populateIndex($what, array $databoxes_id = []) { $stopwatch = new Stopwatch(); $stopwatch->start('populate'); - $this->apply(function (BulkOperation $bulk) use ($what, $databox) { + if ($databoxes_id) { + // If databoxes are given, only use those + $databoxes = array_map(array($this->appbox, 'get_databox'), $databoxes_id); + } else { + $databoxes = $this->appbox->get_databoxes(); + } + $this->apply(function(BulkOperation $bulk) use ($what, $databoxes) { if ($what & self::THESAURUS) { - $this->termIndexer->populateIndex($bulk, $databox); + $this->termIndexer->populateIndex($bulk, $databoxes); // Record indexing depends on indexed terms so we need to make // everything ready to search @@ -118,7 +121,7 @@ class Indexer } if ($what & self::RECORDS) { - $this->recordIndexer->populateIndex($this, $bulk, $databox); + $this->recordIndexer->populateIndex($bulk, $databoxes); // Final flush $bulk->flush(); @@ -130,7 +133,7 @@ class Indexer }); $event = $stopwatch->stop('populate'); - $this->logger->info(sprintf("Indexation finished in %0.02f sec (Mem. %0.02f Mo)", ($event->getDuration()/1000), $event->getMemory()/1048576)); + printf("Indexation finished in %s min (Mem. %s Mo)", ($event->getDuration()/1000/60), bcdiv($event->getMemory(), 1048576, 2)); } public function migrateMappingForDatabox($databox) @@ -157,24 +160,24 @@ class Indexer RecordQueuer::queueRecordsFromCollection($collection); } - public function indexRecord(record_adapter $record) + public function indexRecord(RecordInterface $record) { $this->indexQueue->attach($record); } - public function deleteRecord(record_adapter $record) + public function deleteRecord(RecordInterface $record) { $this->deleteQueue->attach($record); } /** - * @param \databox $databox databox to index + * @param \databox[] $databoxes databoxes to index * @throws \Exception */ - public function indexScheduledRecords(\databox $databox) + public function indexScheduledRecords(array $databoxes) { - $this->apply(function(BulkOperation $bulk) use ($databox) { - $this->recordIndexer->indexScheduled($this, $bulk, $databox); + $this->apply(function(BulkOperation $bulk) use($databoxes) { + $this->recordIndexer->indexScheduled($bulk, $databoxes); }); } @@ -189,7 +192,7 @@ class Indexer } $this->apply(function(BulkOperation $bulk) { - $this->recordIndexer->index($this, $bulk, $this->indexQueue); + $this->recordIndexer->index($bulk, $this->indexQueue); $this->recordIndexer->delete($bulk, $this->deleteQueue); $bulk->flush(); }); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php index 92a1ee2ea6..2cbbef0ac0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/RecordIndexer.php @@ -10,7 +10,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer; -use Alchemy\Phrasea\SearchEngine\Elastic\Indexer; +use Alchemy\Phrasea\Model\RecordInterface; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\FetcherDelegateInterface; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\RecordListFetcherDelegate; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\Record\Delegate\ScheduledFetcherDelegate; @@ -30,7 +30,6 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\CandidateTerms; use databox; use Iterator; use Psr\Log\LoggerInterface; -use record_adapter; class RecordIndexer { @@ -94,44 +93,52 @@ class RecordIndexer * index whole databox(es), don't test actual "jetons" * called by command "populate" * - * @param Indexer $indexer * @param BulkOperation $bulk - * @param databox $databox + * @param databox[] $databoxes */ - public function populateIndex(Indexer $indexer, BulkOperation $bulk, databox $databox) + public function populateIndex(BulkOperation $bulk, array $databoxes) { - $submited_records = []; + foreach ($databoxes as $databox) { - $this->logger->info(sprintf('Indexing database %s...', $databox->get_viewname())); + $submited_records = []; - $fetcher = $this->createFetcherForDatabox($databox); // no delegate, scan the whole records + $this->logger->info(sprintf('Indexing database %s...', $databox->get_viewname())); - // post fetch : flag records as "indexing" - $fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) { - RecordQueuer::didStartIndexingRecords($records, $databox); - // do not restart the fetcher since it has no clause on jetons - }); + $fetcher = $this->createFetcherForDatabox($databox); // no delegate, scan the whole records - // bulk flush : flag records as "indexed" - $bulk->onFlush(function($operation_identifiers) use ($databox, &$submited_records) { - $this->onBulkFlush($databox, $operation_identifiers, $submited_records); - }); + // post fetch : flag records as "indexing" + $fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) { + RecordQueuer::didStartIndexingRecords($records, $databox); + // do not restart the fetcher since it has no clause on jetons + }); - // Perform indexing - $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records); + // bulk flush : flag records as "indexed" + $bulk->onFlush(function($operation_identifiers) use ($databox, &$submited_records) { + $this->onBulkFlush($databox, $operation_identifiers, $submited_records); + }); - $this->logger->info(sprintf('Finished indexing %s', $databox->get_viewname())); + // Perform indexing + $this->indexFromFetcher($bulk, $fetcher, $submited_records); + + $this->logger->info(sprintf('Finished indexing %s', $databox->get_viewname())); + } } /** - * Index the records flagged as "to_index" on databox + * Index the records flagged as "to_index" on databoxes * called by task "indexer" * - * @param Indexer $indexer * @param BulkOperation $bulk - * @param databox $databox + * @param databox[] $databoxes */ - public function indexScheduled(Indexer $indexer, BulkOperation $bulk, databox $databox) + public function indexScheduled(BulkOperation $bulk, array $databoxes) + { + foreach ($databoxes as $databox) { + $this->indexScheduledInDatabox($bulk, $databox); + } + } + + private function indexScheduledInDatabox(BulkOperation $bulk, databox $databox) { $submited_records = []; @@ -141,7 +148,6 @@ class RecordIndexer // post fetch : flag records as "indexing" $fetcher->setPostFetch(function(array $records) use ($databox, $fetcher) { - $this->logger->debug(sprintf("indexing %d records", count($records))); RecordQueuer::didStartIndexingRecords($records, $databox); // because changing the flag on the records affects the "where" clause of the fetcher, // restart it each time @@ -154,17 +160,16 @@ class RecordIndexer }); // Perform indexing - $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records); + $this->indexFromFetcher($bulk, $fetcher, $submited_records); } /** * Index a list of records * - * @param Indexer $indexer * @param BulkOperation $bulk * @param Iterator $records */ - public function index(Indexer $indexer, BulkOperation $bulk, Iterator $records) + public function index(BulkOperation $bulk, Iterator $records) { foreach ($this->createFetchersForRecords($records) as $fetcher) { $submited_records = []; @@ -182,7 +187,7 @@ class RecordIndexer }); // Perform indexing - $this->indexFromFetcher($indexer, $bulk, $fetcher, $submited_records); + $this->indexFromFetcher($bulk, $fetcher, $submited_records); } } @@ -222,18 +227,14 @@ class RecordIndexer { $connection = $databox->get_connection(); $candidateTerms = new CandidateTerms($databox); - $fetcher = new Fetcher( - $databox, - array( - new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->helper), - new TitleHydrator($connection), - new MetadataHydrator($connection, $this->structure, $this->helper), - new FlagHydrator($this->structure, $databox), - new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms), - new SubDefinitionHydrator($connection) - ), - $delegate - ); + $fetcher = new Fetcher($databox, array( + new CoreHydrator($databox->get_sbas_id(), $databox->get_viewname(), $this->helper), + new TitleHydrator($connection), + new MetadataHydrator($connection, $this->structure, $this->helper), + new FlagHydrator($this->structure, $databox), + new ThesaurusHydrator($this->structure, $this->thesaurus, $candidateTerms), + new SubDefinitionHydrator($connection) + ), $delegate); $fetcher->setBatchSize(200); $fetcher->onDrain(function() use ($candidateTerms) { $candidateTerms->save(); @@ -246,41 +247,21 @@ class RecordIndexer { $databoxes = array(); foreach ($records as $record) { - /** @var record_adapter $record */ - $databox = $record->getDatabox(); - $k = $databox->get_sbas_id(); - if(!array_key_exists($k, $databoxes)) { - $databoxes[$k] = [ - 'databox' => $databox, - 'records' => [] - ]; - } - $databoxes[$k]['records'][] = $record; + $databox = $record->get_databox(); + $hash = spl_object_hash($databox); + $databoxes[$hash]['databox'] = $databox; + $databoxes[$hash]['records'][] = $record; } return array_values($databoxes); } - private function indexFromFetcher(Indexer $indexer, BulkOperation $bulk, Fetcher $fetcher, array &$submited_records) + private function indexFromFetcher(BulkOperation $bulk, Fetcher $fetcher, array &$submited_records) { - $databox = $fetcher->getDatabox(); - $first = true; - /** @var record_adapter $record */ + /** @var RecordInterface $record */ while ($record = $fetcher->fetch()) { - if($first) { - $sql = "SELECT prop FROM pref WHERE prop IN('thesaurus','thesaurus_index')" - . " ORDER BY updated_on DESC, IF(prop='thesaurus', 'a', 'z') DESC LIMIT 1"; - if($databox->get_connection()->fetchColumn($sql) == 'thesaurus') { - // the thesaurus was modified, enforce index - $indexer->populateIndex(Indexer::THESAURUS, $databox); - } - $first = false; - } - $op_identifier = $this->getUniqueOperationId($record['id']); - $this->logger->debug(sprintf("indexing record %s of databox %s", $record['record_id'], $databox->get_sbas_id())); - $params = array(); $params['id'] = $record['id']; unset($record['id']); diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php index c66f8583ba..76a00e07e0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer/TermIndexer.php @@ -11,12 +11,13 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic\Indexer; +use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\BulkOperation; use Alchemy\Phrasea\SearchEngine\Elastic\Mapping; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Navigator; use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\TermVisitor; use databox; -use Psr\Log\LoggerInterface; +use DOMDocument; class TermIndexer { @@ -29,54 +30,42 @@ class TermIndexer private $navigator; private $locales; - private $logger; - public function __construct(\appbox $appbox, array $locales, LoggerInterface $logger) + public function __construct(\appbox $appbox, array $locales) { $this->appbox = $appbox; $this->navigator = new Navigator(); $this->locales = $locales; - $this->logger = $logger; } - public function populateIndex(BulkOperation $bulk, databox $databox) + public function populateIndex(BulkOperation $bulk, array $databoxes) { - $databoxId = $databox->get_sbas_id(); + foreach ($databoxes as $databox) { + /** @var databox $databox */ + $databoxId = $databox->get_sbas_id(); - $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) { - // Path and id are prefixed with a databox identifier to not - // collide with other databoxes terms + $visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) { + // Path and id are prefixed with a databox identifier to not + // collide with other databoxes terms - // Term structure - $id = sprintf('%s_%s', $databoxId, $term['id']); - unset($term['id']); - $term['path'] = sprintf('/%s%s', $databoxId, $term['path']); + // Term structure + $id = sprintf('%s_%s', $databoxId, $term['id']); + unset($term['id']); + $term['path'] = sprintf('/%s%s', $databoxId, $term['path']); + $term['databox_id'] = $databoxId; - $this->logger->debug(sprintf("Indexing term \"%s\"", $term['path'])); + // Index request + $params = array(); + $params['id'] = $id; + $params['type'] = self::TYPE_NAME; + $params['body'] = $term; - $term['databox_id'] = $databoxId; + $bulk->index($params, null); + }); - // Index request - $params = array(); - $params['id'] = $id; - $params['type'] = self::TYPE_NAME; - $params['body'] = $term; - - $bulk->index($params, null); - }); - - - $indexDate = $databox->get_connection()->fetchColumn("SELECT updated_on FROM pref WHERE prop='thesaurus'"); - - $document = Helper::thesaurusFromDatabox($databox); - $this->navigator->walk($document, $visitor); - - $databox->get_connection()->executeUpdate( - "INSERT INTO pref (prop, value, locale, updated_on, created_on)" - . " VALUES ('thesaurus_index', '', '-', ?, NOW())" - . " ON DUPLICATE KEY UPDATE updated_on=?", - [$indexDate, $indexDate] - ); + $document = Helper::thesaurusFromDatabox($databox); + $this->navigator->walk($document, $visitor); + } } public function getMapping() diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php index a0fa40bbb4..7e374c44fa 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/IndexerSubscriber.php @@ -110,7 +110,7 @@ class IndexerSubscriber implements EventSubscriberInterface public function onThesaurusChange(ThesaurusEvent $event) { $databox = $event->getDatabox(); - $databox->delete_data_from_cache(\databox::CACHE_THESAURUS); + $this->getIndexer()->scheduleRecordsFromDataboxForIndexing($databox); } public function onCollectionChange(CollectionEvent $event) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php index 7c4ebba7f7..e771b48327 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus/Helper.php @@ -67,10 +67,6 @@ class Helper return $parents; } - /** - * @param databox $databox - * @return DOMDocument - */ public static function thesaurusFromDatabox(databox $databox) { return self::document($databox->get_dom_thesaurus()); @@ -93,10 +89,6 @@ class Helper return $document; } - /** - * @param $document - * @return DOMDocument - */ private static function document($document) { if (!$document) { diff --git a/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php b/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php index 776ba36189..153b4ed4c1 100644 --- a/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php +++ b/lib/Alchemy/Phrasea/TaskManager/Job/IndexerJob.php @@ -11,10 +11,6 @@ namespace Alchemy\Phrasea\TaskManager\Job; use Alchemy\Phrasea\TaskManager\Editor\IndexerEditor; use Alchemy\Phrasea\SearchEngine\Elastic\Indexer; -use Alchemy\Phrasea\Core\Version; -use Silex\Application; -use Psr\Log\LoggerInterface; - class IndexerJob extends AbstractJob { @@ -51,16 +47,12 @@ class IndexerJob extends AbstractJob */ protected function doJob(JobData $data) { - $app = $data->getApplication(); /** @var Indexer $indexer */ $indexer = $app['elasticsearch.indexer']; - - foreach($app->getDataboxes() as $databox) { - if($app->getApplicationBox()->is_databox_indexable($databox)) { - $indexer->indexScheduledRecords($databox); - } - } + $databoxes = array_filter($app->getDataboxes(), function (\databox $databox) use ($app) { + return $app->getApplicationBox()->is_databox_indexable($databox); + }); + $indexer->indexScheduledRecords($databoxes); } -} - +} \ No newline at end of file diff --git a/templates/web/thesaurus/new-term.html.twig b/templates/web/thesaurus/new-term.html.twig index 8c7c825392..fbbf141970 100644 --- a/templates/web/thesaurus/new-term.html.twig +++ b/templates/web/thesaurus/new-term.html.twig @@ -14,7 +14,7 @@ {% set opener = 'opener' %} {% endif %} - {% if context %} + {% if context is not none %} {% set zterm %} {% trans with {'%term%' : term, '%context%' : context} %}thesaurus:: le terme %term% avec contexte %context%{% endtrans %} {% endset %} @@ -57,13 +57,14 @@ {% else %} {% if nb_candidates_bad > 0 %} + // present dans les candidats, mais aucun champ acceptable : on informe {% set prop_label = 'thesaurus:: est candidat en provenance des champs mais ne peut etre accepte a cet emplacement du thesaurus' | trans %} {% else %} + // pas present dans les candidats {% set prop_label = 'thesaurus:: n\'est pas present dans les candidats' | trans %} {% endif %}
-
-
+

{{ 'thesaurus:: attention :' | trans }}



{{ zterm }} @@ -71,10 +72,14 @@
{{ prop_label }}
-
-
-
+
+
+
+
+
+
+
    @@ -154,8 +159,15 @@ parms += "&k={{ context | url_encode }}"; {% endif %} parms += "&sylng={{ sylng }}"; - parms += "&reindex=0"; + for(i=0; i<(n=document.getElementsByName("reindex")).length; i++) + { + if(n[i].checked) + { + parms += "&reindex=" + encodeURIComponent(n[i].value); + break; + } + } ret = loadXMLDoc(url, parms, true); refresh = ret.getElementsByTagName("refresh"); for(i=0; i