mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-16 14:33:14 +00:00
WIP Thesaurus terms linking
This commit is contained in:
@@ -18,6 +18,7 @@ use Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\RecordIndexer;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Indexer\TermIndexer;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngine;
|
||||
use Alchemy\Phrasea\SearchEngine\Phrasea\PhraseaEngineSubscriber;
|
||||
use Elasticsearch\Client;
|
||||
@@ -82,6 +83,7 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
|
||||
$app['elasticsearch.indexer.record_indexer'] = $app->share(function ($app) {
|
||||
return new RecordIndexer(
|
||||
$app['elasticsearch.thesaurus'],
|
||||
$app['elasticsearch.engine'],
|
||||
$app['phraseanet.appbox']
|
||||
);
|
||||
@@ -108,6 +110,13 @@ class SearchEngineServiceProvider implements ServiceProviderInterface
|
||||
|
||||
return array_replace($defaults, $options);
|
||||
});
|
||||
|
||||
$app['elasticsearch.thesaurus'] = $app->share(function ($app) {
|
||||
return new Thesaurus(
|
||||
$app['elasticsearch.client'],
|
||||
$app['elasticsearch.options']['index']
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
public function boot(Application $app)
|
||||
|
@@ -95,8 +95,12 @@ class Indexer
|
||||
$bulk->setAutoFlushLimit(1000);
|
||||
|
||||
$this->termIndexer->populateIndex($bulk);
|
||||
// Record indexing depends on indexed terms so we need to flush
|
||||
// between the two operations
|
||||
$bulk->flush();
|
||||
$this->recordIndexer->populateIndex($bulk);
|
||||
|
||||
// Final flush
|
||||
$bulk->flush();
|
||||
|
||||
// Optimize index
|
||||
|
@@ -19,6 +19,8 @@ use Alchemy\Phrasea\SearchEngine\Elastic\Mapping;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\RecordFetcher;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\RecordHelper;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\StringUtils;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
use Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus\Helper as ThesaurusHelper;
|
||||
use media_subdef;
|
||||
|
||||
class RecordIndexer
|
||||
@@ -35,8 +37,9 @@ class RecordIndexer
|
||||
*/
|
||||
private $elasticSearchEngine;
|
||||
|
||||
public function __construct(ElasticSearchEngine $elasticSearchEngine, \appbox $appbox)
|
||||
public function __construct(Thesaurus $thesaurus, ElasticSearchEngine $elasticSearchEngine, \appbox $appbox)
|
||||
{
|
||||
$this->thesaurus = $thesaurus;
|
||||
$this->appbox = $appbox;
|
||||
$this->elasticSearchEngine = $elasticSearchEngine;
|
||||
}
|
||||
@@ -47,12 +50,11 @@ class RecordIndexer
|
||||
$recordHelper = new RecordHelper($this->appbox);
|
||||
|
||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||
// TODO Pass a BulkOperation object to TermIndexer to muliplex
|
||||
// indexing queries between types
|
||||
$fetcher = new RecordFetcher($databox, $recordHelper);
|
||||
$fetcher->setBatchSize(200);
|
||||
while ($records = $fetcher->fetch()) {
|
||||
foreach ($records as $record) {
|
||||
$record['concept_paths'] = $this->findLinkedConcepts($record);
|
||||
$params = array();
|
||||
$params['id'] = $record['id'];
|
||||
$params['type'] = self::TYPE_NAME;
|
||||
@@ -63,6 +65,11 @@ class RecordIndexer
|
||||
}
|
||||
}
|
||||
|
||||
private function findLinkedConcepts($record)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
public function getMapping()
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
@@ -81,6 +88,10 @@ class RecordIndexer
|
||||
// Dates
|
||||
->add('created_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
->add('updated_on', 'date')->format(Mapping::DATE_FORMAT_MYSQL)
|
||||
// Inferred thesaurus concepts
|
||||
->add('concept_paths', 'string')
|
||||
->analyzer('thesaurus_path', 'indexing')
|
||||
->analyzer('keyword', 'searching')
|
||||
;
|
||||
|
||||
// Caption mapping
|
||||
@@ -149,6 +160,13 @@ class RecordIndexer
|
||||
$field['indexable'] = $fieldStructure->is_indexable();
|
||||
$field['to_aggregate'] = false; // @todo, dev in progress
|
||||
|
||||
// Thesaurus concept inference
|
||||
// $xpath = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
||||
$helper = new ThesaurusHelper();
|
||||
// TODO Find thesaurus path prefixes
|
||||
$field['thesaurus_concept_inference'] = true;
|
||||
$field['thesaurus_prefix'] = '/categories';
|
||||
|
||||
$name = $fieldStructure->get_name();
|
||||
|
||||
printf("Field \"%s\" <%s> (private: %b)\n", $name, $field['type'], $field['private']);
|
||||
|
@@ -45,7 +45,7 @@ class TermIndexer
|
||||
$databoxId = $databox->get_sbas_id();
|
||||
$document = self::thesaurusFromDatabox($databox);
|
||||
$visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) {
|
||||
printf("- %s (%s)\n", $term['path'], $term['value']);
|
||||
// printf("- %s (%s)\n", $term['path'], $term['value']);
|
||||
// Term structure
|
||||
$id = $term['id'];
|
||||
unset($term['id']);
|
||||
@@ -78,9 +78,7 @@ class TermIndexer
|
||||
->add('raw_value', 'string')->notAnalyzed()
|
||||
->add('value', 'string')
|
||||
->add('context', 'string')
|
||||
->add('path', 'string')
|
||||
->analyzer('thesaurus_path', 'indexing')
|
||||
->analyzer('keyword', 'searching')
|
||||
->add('path', 'string')->notAnalyzed()
|
||||
->add('lang', 'string')->notAnalyzed()
|
||||
->add('databox_id', 'integer')
|
||||
;
|
||||
|
31
lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php
Normal file
31
lib/Alchemy/Phrasea/SearchEngine/Elastic/Thesaurus.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Phraseanet
|
||||
*
|
||||
* (c) 2005-2014 Alchemy
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic;
|
||||
|
||||
use Elasticsearch\Client;
|
||||
|
||||
class Thesaurus
|
||||
{
|
||||
private $client;
|
||||
private $index;
|
||||
|
||||
public function __construct(Client $client, $index)
|
||||
{
|
||||
$this->client = $client;
|
||||
$this->index = $index;
|
||||
}
|
||||
|
||||
public function findConceptPath($term, $context = null, $lang = null)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
@@ -0,0 +1,35 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Phraseanet
|
||||
*
|
||||
* (c) 2005-2014 Alchemy
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
namespace Alchemy\Phrasea\SearchEngine\Elastic\Thesaurus;
|
||||
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use DOMXPath;
|
||||
use Elasticsearch\Client;
|
||||
|
||||
class Helper
|
||||
{
|
||||
public function findNodesByXPath($document, $xpath)
|
||||
{
|
||||
$tbranch = "/thesaurus/te[@id='T26'] | /thesaurus/te[@id='T24']";
|
||||
$xpath = new \DOMXPath($document);
|
||||
$nodeList = $xpath->query($tbranch);
|
||||
$conceptIds = [];
|
||||
foreach ($nodeList as $node) {
|
||||
if ($node->hasAttribute('id')) {
|
||||
$conceptIds[] = $node->getAttribute('id');
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user