mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-15 05:53:13 +00:00
Add elision on french tokenizer and fix missed config
This commit is contained in:
@@ -345,7 +345,7 @@ class ElasticSearchEngine implements SearchEngineInterface
|
|||||||
* {@inheritdoc}
|
* {@inheritdoc}
|
||||||
*
|
*
|
||||||
* @todo Allow multiple hosts!
|
* @todo Allow multiple hosts!
|
||||||
* @return ElastcSearchEngine
|
* @return \Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine
|
||||||
*/
|
*/
|
||||||
public static function create(Application $app, array $options = [])
|
public static function create(Application $app, array $options = [])
|
||||||
{
|
{
|
||||||
|
@@ -49,7 +49,6 @@ class Indexer
|
|||||||
$params['index'] = $this->options['index'];
|
$params['index'] = $this->options['index'];
|
||||||
$params['body']['settings']['number_of_shards'] = $this->options['shards'];
|
$params['body']['settings']['number_of_shards'] = $this->options['shards'];
|
||||||
$params['body']['settings']['number_of_replicas'] = $this->options['replicas'];
|
$params['body']['settings']['number_of_replicas'] = $this->options['replicas'];
|
||||||
|
|
||||||
$params['body']['settings']['analysis'] = $this->getAnalysis();;
|
$params['body']['settings']['analysis'] = $this->getAnalysis();;
|
||||||
|
|
||||||
if ($withMapping) {
|
if ($withMapping) {
|
||||||
@@ -57,6 +56,7 @@ class Indexer
|
|||||||
$params['body']['mappings'][self::TYPE_RECORD] = $this->getRecordMapping();
|
$params['body']['mappings'][self::TYPE_RECORD] = $this->getRecordMapping();
|
||||||
$params['body']['mappings'][self::TYPE_TERM] = $this->getTermMapping();
|
$params['body']['mappings'][self::TYPE_TERM] = $this->getTermMapping();
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->client->indices()->create($params);
|
$this->client->indices()->create($params);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,7 +261,7 @@ class Indexer
|
|||||||
// Business rules
|
// Business rules
|
||||||
$field['private'] = $fieldStructure->isBusiness();
|
$field['private'] = $fieldStructure->isBusiness();
|
||||||
$field['indexable'] = $fieldStructure->is_indexable();
|
$field['indexable'] = $fieldStructure->is_indexable();
|
||||||
$field['to_aggregate'] = false; // @todo
|
$field['to_aggregate'] = false; // @todo, dev in progress
|
||||||
|
|
||||||
$name = $fieldStructure->get_name();
|
$name = $fieldStructure->get_name();
|
||||||
|
|
||||||
@@ -359,54 +359,54 @@ class Indexer
|
|||||||
'general_light' => [
|
'general_light' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase']
|
'filter' => ['nfkc_normalizer', 'asciifolding']
|
||||||
],
|
],
|
||||||
// Lang specific
|
// Lang specific
|
||||||
'fr_full' => [
|
'fr_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer', // better support for some Asian languages and using custom rules to break Myanmar and Khmer text.
|
'tokenizer' => 'icu_tokenizer', // better support for some Asian languages and using custom rules to break Myanmar and Khmer text.
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_fr', 'stem_fr']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'elision', 'stop_fr', 'stem_fr']
|
||||||
],
|
],
|
||||||
'en_full' => [
|
'en_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_en', 'stem_en']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_en', 'stem_en']
|
||||||
],
|
],
|
||||||
'de_full' => [
|
'de_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_de', 'stem_de']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_de', 'stem_de']
|
||||||
],
|
],
|
||||||
'nl_full' => [
|
'nl_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_nl', 'stem_nl_override', 'stem_nl']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_nl', 'stem_nl_override', 'stem_nl']
|
||||||
],
|
],
|
||||||
'es_full' => [
|
'es_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_es', 'stem_es']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_es', 'stem_es']
|
||||||
],
|
],
|
||||||
'ar_full' => [
|
'ar_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_ar', 'stem_ar']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_ar', 'stem_ar']
|
||||||
],
|
],
|
||||||
'ru_full' => [
|
'ru_full' => [
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase', 'stop_ru', 'stem_ru']
|
'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_ru', 'stem_ru']
|
||||||
],
|
],
|
||||||
'cn_full' => [ // Standard chinese analyzer is not exposed
|
'cn_full' => [ // Standard chinese analyzer is not exposed
|
||||||
'type' => 'custom',
|
'type' => 'custom',
|
||||||
'tokenizer' => 'icu_tokenizer',
|
'tokenizer' => 'icu_tokenizer',
|
||||||
'filter' => ['nfkc_normalizer', 'lowercase']
|
'filter' => ['nfkc_normalizer', 'asciifolding']
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
'filter' => [
|
'filter' => [
|
||||||
'nfkc_normalizer' => [ // weißkopfseeadler => weisskopfseeadler, ١٢٣٤٥ => 12345.
|
'nfkc_normalizer' => [ // weißkopfseeadler => weisskopfseeadler, ١٢٣٤٥ => 12345.
|
||||||
'type' => 'icu_normalizer', // œ => oe, and use the fewest bytes possible.
|
'type' => 'icu_normalizer', // œ => oe, and use the fewest bytes possible.
|
||||||
'name' => 'nfkc_cf' // nfkc_cf do the asciifolding job too.
|
'name' => 'nfkc_cf' // nfkc_cf do the lowercase job too.
|
||||||
],
|
],
|
||||||
|
|
||||||
'stop_fr' => [
|
'stop_fr' => [
|
||||||
|
Reference in New Issue
Block a user