From f535aaf3ad7f6a727a3877c0b9e94d0e52a3e159 Mon Sep 17 00:00:00 2001 From: Damien Alexandre Date: Mon, 8 Sep 2014 15:50:47 +0200 Subject: [PATCH] Add elision on french tokenizer and fix missed config --- .../Elastic/ElasticSearchEngine.php | 2 +- .../Phrasea/SearchEngine/Elastic/Indexer.php | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index 55ccf44ce2..2036e44968 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -345,7 +345,7 @@ class ElasticSearchEngine implements SearchEngineInterface * {@inheritdoc} * * @todo Allow multiple hosts! - * @return ElastcSearchEngine + * @return \Alchemy\Phrasea\SearchEngine\Elastic\ElasticSearchEngine */ public static function create(Application $app, array $options = []) { diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php index d2d041db32..b8cf5e3a65 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Indexer.php @@ -49,7 +49,6 @@ class Indexer $params['index'] = $this->options['index']; $params['body']['settings']['number_of_shards'] = $this->options['shards']; $params['body']['settings']['number_of_replicas'] = $this->options['replicas']; - $params['body']['settings']['analysis'] = $this->getAnalysis();; if ($withMapping) { @@ -57,6 +56,7 @@ class Indexer $params['body']['mappings'][self::TYPE_RECORD] = $this->getRecordMapping(); $params['body']['mappings'][self::TYPE_TERM] = $this->getTermMapping(); } + $this->client->indices()->create($params); } @@ -261,7 +261,7 @@ class Indexer // Business rules $field['private'] = $fieldStructure->isBusiness(); $field['indexable'] = $fieldStructure->is_indexable(); - $field['to_aggregate'] = false; // @todo + $field['to_aggregate'] = false; // @todo, dev in progress $name = $fieldStructure->get_name(); @@ -359,54 +359,54 @@ class Indexer 'general_light' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase'] + 'filter' => ['nfkc_normalizer', 'asciifolding'] ], // Lang specific 'fr_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', // better support for some Asian languages and using custom rules to break Myanmar and Khmer text. - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_fr', 'stem_fr'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'elision', 'stop_fr', 'stem_fr'] ], 'en_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_en', 'stem_en'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_en', 'stem_en'] ], 'de_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_de', 'stem_de'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_de', 'stem_de'] ], 'nl_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_nl', 'stem_nl_override', 'stem_nl'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_nl', 'stem_nl_override', 'stem_nl'] ], 'es_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_es', 'stem_es'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_es', 'stem_es'] ], 'ar_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_ar', 'stem_ar'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_ar', 'stem_ar'] ], 'ru_full' => [ 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase', 'stop_ru', 'stem_ru'] + 'filter' => ['nfkc_normalizer', 'asciifolding', 'stop_ru', 'stem_ru'] ], 'cn_full' => [ // Standard chinese analyzer is not exposed 'type' => 'custom', 'tokenizer' => 'icu_tokenizer', - 'filter' => ['nfkc_normalizer', 'lowercase'] + 'filter' => ['nfkc_normalizer', 'asciifolding'] ] ], 'filter' => [ 'nfkc_normalizer' => [ // weißkopfseeadler => weisskopfseeadler, ١٢٣٤٥ => 12345. 'type' => 'icu_normalizer', // œ => oe, and use the fewest bytes possible. - 'name' => 'nfkc_cf' // nfkc_cf do the asciifolding job too. + 'name' => 'nfkc_cf' // nfkc_cf do the lowercase job too. ], 'stop_fr' => [