mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-18 15:33:15 +00:00
Thesaurus terms indexing
This commit is contained in:
@@ -202,6 +202,16 @@ class Indexer
|
||||
'type' => 'custom',
|
||||
'tokenizer' => 'icu_tokenizer',
|
||||
'filter' => ['nfkc_normalizer', 'asciifolding']
|
||||
],
|
||||
// Thesaurus specific
|
||||
'thesaurus_path' => [
|
||||
'type' => 'custom',
|
||||
'tokenizer' => 'thesaurus_path'
|
||||
]
|
||||
],
|
||||
'tokenizer' => [
|
||||
'thesaurus_path' => [
|
||||
'type' => 'path_hierarchy'
|
||||
]
|
||||
],
|
||||
'filter' => [
|
||||
|
@@ -31,30 +31,33 @@ class TermIndexer
|
||||
*/
|
||||
private $appbox;
|
||||
|
||||
private $navigator;
|
||||
|
||||
public function __construct(\appbox $appbox)
|
||||
{
|
||||
$this->appbox = $appbox;
|
||||
$this->navigator = new Navigator();
|
||||
}
|
||||
|
||||
public function populateIndex(BulkOperation $bulk)
|
||||
{
|
||||
// TODO Create object to query thesaurus for term paths/synonyms
|
||||
|
||||
$navigator = new Navigator();
|
||||
|
||||
foreach ($this->appbox->get_databoxes() as $databox) {
|
||||
$databoxId = $databox->get_sbas_id();
|
||||
$document = self::thesaurusFromDatabox($databox);
|
||||
$visitor = new TermVisitor(function ($term) use ($bulk) {
|
||||
$visitor = new TermVisitor(function ($term) use ($bulk, $databoxId) {
|
||||
printf("- %s (%s)\n", $term['path'], $term['value']);
|
||||
});
|
||||
$navigator->walk($document, $visitor);
|
||||
|
||||
while ($record = false) {
|
||||
// Term structure
|
||||
$id = $term['id'];
|
||||
unset($term['id']);
|
||||
$term['databox_id'] = $databoxId;
|
||||
// Index request
|
||||
$params = array();
|
||||
$params['id'] = $record['id'];
|
||||
$params['body'] = $record;
|
||||
$params['id'] = $id;
|
||||
$params['type'] = self::TYPE_NAME;
|
||||
$params['body'] = $term;
|
||||
$bulk->index($params);
|
||||
}
|
||||
});
|
||||
$this->navigator->walk($document, $visitor);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,9 +75,12 @@ class TermIndexer
|
||||
{
|
||||
$mapping = new Mapping();
|
||||
$mapping
|
||||
->add('raw_value', 'string')->notAnalyzed()
|
||||
->add('value', 'string')
|
||||
->add('context', 'string')
|
||||
->add('path', 'string')
|
||||
->analyzer('thesaurus_path', 'indexing')
|
||||
->analyzer('keyword', 'searching')
|
||||
->add('lang', 'string')->notAnalyzed()
|
||||
->add('databox_id', 'integer')
|
||||
;
|
||||
|
@@ -90,6 +90,31 @@ class Mapping
|
||||
return $properties;
|
||||
}
|
||||
|
||||
public function analyzer($analyzer, $type = null)
|
||||
{
|
||||
$field = &$this->currentField();
|
||||
if ($field['type'] !== self::TYPE_STRING) {
|
||||
throw new LogicException('Only string fields can be analyzed');
|
||||
}
|
||||
switch ($type) {
|
||||
case null:
|
||||
$field['analyzer'] = $analyzer;
|
||||
unset($field['index_analyzer'], $field['search_analyzer']);
|
||||
break;
|
||||
case 'indexing':
|
||||
$field['index_analyzer'] = $analyzer;
|
||||
break;
|
||||
case 'searching':
|
||||
$field['search_analyzer'] = $analyzer;
|
||||
break;
|
||||
default:
|
||||
throw new LogicException(sprintf('Invalid analyzer type "%s".', $type));
|
||||
}
|
||||
$field['index'] = 'analyzed';
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function notAnalyzed()
|
||||
{
|
||||
$field = &$this->currentField();
|
||||
|
@@ -24,6 +24,7 @@ use DOMNode;
|
||||
class TermVisitor implements VisitorInterface
|
||||
{
|
||||
const TERM_TAG_NAME = 'sy';
|
||||
const TERM_ID_ATTR = 'id';
|
||||
const TERM_LANG_ATTR = 'lng';
|
||||
const TERM_VALUE_ATTR = 'v';
|
||||
// So, this is a huuuge regex to match a group of words eventually followed
|
||||
@@ -54,7 +55,8 @@ class TermVisitor implements VisitorInterface
|
||||
$term = $this->parseTermValue($value);
|
||||
$term += [
|
||||
'path' => $this->getCurrentPathAsString(),
|
||||
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR)
|
||||
'lang' => $this->getTermAttribute($element, self::TERM_LANG_ATTR),
|
||||
'id' => $this->getTermAttribute($element, self::TERM_ID_ATTR)
|
||||
];
|
||||
|
||||
call_user_func($this->termCallback, $term);
|
||||
|
Reference in New Issue
Block a user