mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-18 15:33:15 +00:00
Thesaurus terms indexing
This commit is contained in:
@@ -202,6 +202,16 @@ class Indexer
|
|||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'asciifolding']
|
'filter' => ['nfkc_normalizer', 'asciifolding']
|
||||||
|
],
|
||||||
|
// Thesaurus specific
|
||||||
|
'thesaurus_path' => [
|
||||||
|
'type' => 'custom',
|
||||||
|
'tokenizer' => 'thesaurus_path'
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'tokenizer' => [
|
||||||
|
'thesaurus_path' => [
|
||||||
|
'type' => 'path_hierarchy'
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
'filter' => [
|
'filter' => [
|
||||||
|
@@ -31,30 +31,33 @@ class TermIndexer
|
|||||||
*/
|
*/
|
||||||
private $appbox;
|
private $appbox;
|
||||||
|
|
||||||
|
private $navigator;
|
||||||
|
|
||||||
public function __construct(\appbox $appbox)
|
public function __construct(\appbox $appbox)
|
||||||
{
|
{
|
||||||
$this->appbox = $appbox;
|
$this->appbox = $appbox;
|
||||||
|
$this->navigator = new Navigator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function populateIndex(BulkOperation $bulk)
|
public function populateIndex(BulkOperation $bulk)
|
||||||
{
|
{
|
||||||
// TODO Create object to query thesaurus for term paths/synonyms
|
|
||||||
|
|
||||||
$navigator = new Navigator();
|
|
||||||
|
|
||||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||||
|
$databoxId = $databox->get_sbas_id();
|
||||||
$document = self::thesaurusFromDatabox($databox);
|
$document = self::thesaurusFromDatabox($databox);
|
||||||
$visitor = new TermVisitor(function ($term) use ($bulk) {
|
$visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) {
|
||||||
printf("- %s (%s)\n", $term['path'], $term['value']);
|
printf("- %s (%s)\n", $term['path'], $term['value']);
|
||||||
});
|
// Term structure
|
||||||
$navigator->walk($document, $visitor);
|
$id = $term['id'];
|
||||||
|
unset($term['id']);
|
||||||
while ($record = false) {
|
$term['databox_id'] = $databoxId;
|
||||||
|
// Index request
|
||||||
$params = array();
|
$params = array();
|
||||||
$params['id'] = $record['id'];
|
$params['id'] = $id;
|
||||||
$params['body'] = $record;
|
$params['type'] = self::TYPE_NAME;
|
||||||
|
$params['body'] = $term;
|
||||||
$bulk->index($params);
|
$bulk->index($params);
|
||||||
}
|
});
|
||||||
|
$this->navigator->walk($document, $visitor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,9 +75,12 @@ class TermIndexer
|
|||||||
{
|
{
|
||||||
$mapping = new Mapping();
|
$mapping = new Mapping();
|
||||||
$mapping
|
$mapping
|
||||||
|
->add('raw_value', 'string')->notAnalyzed()
|
||||||
->add('value', 'string')
|
->add('value', 'string')
|
||||||
->add('context', 'string')
|
->add('context', 'string')
|
||||||
->add('path', 'string')
|
->add('path', 'string')
|
||||||
|
->analyzer('thesaurus_path', 'indexing')
|
||||||
|
->analyzer('keyword', 'searching')
|
||||||
->add('lang', 'string')->notAnalyzed()
|
->add('lang', 'string')->notAnalyzed()
|
||||||
->add('databox_id', 'integer')
|
->add('databox_id', 'integer')
|
||||||
;
|
;
|
||||||
|
@@ -90,6 +90,31 @@ class Mapping
|
|||||||
return $properties;
|
return $properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function analyzer($analyzer, $type = null)
|
||||||
|
{
|
||||||
|
$field = &$this->currentField();
|
||||||
|
if ($field['type'] !== self::TYPE_STRING) {
|
||||||
|
throw new LogicException('Only string fields can be analyzed');
|
||||||
|
}
|
||||||
|
switch ($type) {
|
||||||
|
case null:
|
||||||
|
$field['analyzer'] = $analyzer;
|
||||||
|
unset($field['index_analyzer'], $field['search_analyzer']);
|
||||||
|
break;
|
||||||
|
case 'indexing':
|
||||||
|
$field['index_analyzer'] = $analyzer;
|
||||||
|
break;
|
||||||
|
case 'searching':
|
||||||
|
$field['search_analyzer'] = $analyzer;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new LogicException(sprintf('Invalid analyzer type "%s".', $type));
|
||||||
|
}
|
||||||
|
$field['index'] = 'analyzed';
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
public function notAnalyzed()
|
public function notAnalyzed()
|
||||||
{
|
{
|
||||||
$field = &$this->currentField();
|
$field = &$this->currentField();
|
||||||
|
@@ -24,6 +24,7 @@ use DOMNode;
|
|||||||
class TermVisitor implements VisitorInterface
|
class TermVisitor implements VisitorInterface
|
||||||
{
|
{
|
||||||
const TERM_TAG_NAME = 'sy';
|
const TERM_TAG_NAME = 'sy';
|
||||||
|
const TERM_ID_ATTR = 'id';
|
||||||
const TERM_LANG_ATTR = 'lng';
|
const TERM_LANG_ATTR = 'lng';
|
||||||
const TERM_VALUE_ATTR = 'v';
|
const TERM_VALUE_ATTR = 'v';
|
||||||
// So, this is a huuuge regex to match a group of words eventually followed
|
// So, this is a huuuge regex to match a group of words eventually followed
|
||||||
@@ -54,7 +55,8 @@ class TermVisitor implements VisitorInterface
|
|||||||
$term = $this->parseTermValue($value);
|
$term = $this->parseTermValue($value);
|
||||||
$term += [
|
$term += [
|
||||||
'path' => $this->getCurrentPathAsString(),
|
'path' => $this->getCurrentPathAsString(),
|
||||||
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR)
|
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR),
|
||||||
|
'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR)
|
||||||
];
|
];
|
||||||
|
|
||||||
call_user_func($this->termCallback, $term);
|
call_user_func($this->termCallback, $term);
|
||||||
|
Reference in New Issue
Block a user