From a43b94684ee60e0b4a477c5d10e9b7ec32596c1c Mon Sep 17 00:00:00 2001 From: jygaulier Date: Mon, 15 Nov 2021 18:55:03 +0100 Subject: [PATCH 1/2] PHRAS-3583 add missing "raw" mapping values to date & numbers --- .../Elastic/Mapping/FieldToFieldMappingConverter.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php index 89749b9518..6db3501483 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php @@ -27,6 +27,9 @@ class FieldToFieldMappingConverter $ret->disableIndexing(); } else { + $ret->addChild( + (new StringFieldMapping('raw')) + ->enableRawIndexing()); $ret->addChild( (new StringFieldMapping('light')) ->setAnalyzer('general_light') @@ -55,6 +58,9 @@ class FieldToFieldMappingConverter $ret->disableIndexing(); } else { + $ret->addChild( + (new StringFieldMapping('raw')) + ->enableRawIndexing()); $ret->addChild( (new StringFieldMapping('light')) ->setAnalyzer('general_light') From ce3a28b5cd60eb04549de4fd22d7adadfd6f46fc Mon Sep 17 00:00:00 2001 From: jygaulier Date: Tue, 16 Nov 2021 20:52:54 +0100 Subject: [PATCH 2/2] PHRAS-3583 define a custom "sort" analyzer (whole value, lowercase, no diacritics) fix sort by type (string/date/number) fix sort on business fields (use "private_caption.field") sort string fields on ".sort" sub-value (in place of ".raw") add option for docker/es to activate scripting on search (disabled for now) --- docker/elasticsearch/Dockerfile | 3 ++ .../Elastic/ElasticSearchEngine.php | 52 ++++++++++++++++++- .../Phrasea/SearchEngine/Elastic/Index.php | 6 +++ .../Mapping/FieldToFieldMappingConverter.php | 14 ++--- .../Elastic/Mapping/StringFieldMapping.php | 4 ++ 5 files changed, 72 insertions(+), 7 deletions(-) diff --git a/docker/elasticsearch/Dockerfile b/docker/elasticsearch/Dockerfile index 5847b7448f..12ccdc5b92 100644 --- a/docker/elasticsearch/Dockerfile +++ b/docker/elasticsearch/Dockerfile @@ -1,3 +1,6 @@ FROM elasticsearch:2.4 +# uncomment to allow (groovy) script in search/sort etc. +# RUN echo "script.engine.groovy.inline.search: on" >> config/elasticsearch.yml + RUN /usr/share/elasticsearch/bin/plugin install analysis-icu diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index 538755999f..43dc650c27 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -455,7 +455,57 @@ class ElasticSearchEngine implements SearchEngineInterface $sort['record_id'] = $options->getSortOrder(); } else { - $sort[sprintf('caption.%s.raw', $options->getSortBy())] = $options->getSortOrder(); + $f = array_filter( + $options->getFields(), + function (databox_field $f) use($options) { + return $f->get_name() === $options->getSortBy(); + } + ); + if(count($f) == 1) { + // the field is found + $f = array_pop($f); + /** databox_field $f */ + $k = sprintf('%scaption.%s', $f->isBusiness() ? "private_":"", $options->getSortBy()); + switch ($f->get_type()) { + case databox_field::TYPE_DATE: + $sort[$k] = [ + 'order' => $options->getSortOrder(), + 'missing' => "_last", + 'unmapped_type' => "date" + ]; + break; + case databox_field::TYPE_NUMBER: + $sort[$k] = [ + 'order' => $options->getSortOrder(), + 'missing' => "_last", + 'unmapped_type' => "double" + ]; + break; + case databox_field::TYPE_STRING: + default: + $k .= '.sort'; + $sort[$k] = [ + 'order' => $options->getSortOrder(), + 'missing' => "_last", + 'unmapped_type' => "keyword" + ]; + break; + } + } + + /* script tryout + $sort["_script"] = [ + 'type' => "string", + 'script' => [ + // 'lang' => "painless", + 'inline' => sprintf( + "doc['caption.%s'] ? doc['caption.%s.raw'].value : (doc['private_caption.%s'] ? doc['private_caption.%s.raw'].value : '')", + $options->getSortBy(), $options->getSortBy(), $options->getSortBy(), $options->getSortBy() + ) + ], + 'order' => "asc" + ]; + */ } if (!array_key_exists('record_id', $sort)) { diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Index.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Index.php index 9671b83226..c6850c60d9 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Index.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Index.php @@ -83,6 +83,12 @@ class Index { $this->analysis = [ 'analyzer' => [ + // used to sort + 'sort' => [ + 'type' => 'custom', + 'tokenizer' => 'keyword', // don't tokenize, keep whole value as a string + 'filter' => ['lowercase', 'asciifolding'] // asciifolding = remove diacritics + ], // General purpose, without removing stop word or stem: improve meaning accuracy 'general_light' => [ 'type' => 'custom', diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php index 6db3501483..807ab35376 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/FieldToFieldMappingConverter.php @@ -27,9 +27,10 @@ class FieldToFieldMappingConverter $ret->disableIndexing(); } else { - $ret->addChild( - (new StringFieldMapping('raw')) - ->enableRawIndexing()); + // no more need "raw" for sorting (sort arg depends on type) + // $ret->addChild( + // (new StringFieldMapping('raw')) + // ->enableRawIndexing()); $ret->addChild( (new StringFieldMapping('light')) ->setAnalyzer('general_light') @@ -58,9 +59,10 @@ class FieldToFieldMappingConverter $ret->disableIndexing(); } else { - $ret->addChild( - (new StringFieldMapping('raw')) - ->enableRawIndexing()); + // no more need "raw" for sorting (sort arg depends on type) + // $ret->addChild( + // (new StringFieldMapping('raw')) + // ->enableRawIndexing()); $ret->addChild( (new StringFieldMapping('light')) ->setAnalyzer('general_light') diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php index 4f7c9d360d..8ddef7fb96 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/Mapping/StringFieldMapping.php @@ -57,6 +57,10 @@ class StringFieldMapping extends ComplexFieldMapping $child->setAnalyzer('general_light'); $this->addChild($child); + $child = new StringFieldMapping('sort'); + $child->setAnalyzer('sort'); // custom = lowercase(keyword) + $this->addChild($child); + $child = new StringFieldMapping('truncated'); $child->setAnalyzer('truncation_analyzer', 'indexing'); $child->setAnalyzer('truncation_analyzer#search', 'searching');