PHRAS-3583

define a custom "sort" analyzer (whole value, lowercase, no diacritics)
fix sort by type (string/date/number)
fix sort on business fields (use "private_caption.field")
sort string fields on ".sort" sub-value (in place of ".raw")
add option for docker/es to activate scripting on search (disabled for now)
This commit is contained in:
jygaulier
2021-11-16 20:52:54 +01:00
parent a43b94684e
commit ce3a28b5cd
5 changed files with 72 additions and 7 deletions

View File

@@ -1,3 +1,6 @@
FROM elasticsearch:2.4 FROM elasticsearch:2.4
# uncomment to allow (groovy) script in search/sort etc.
# RUN echo "script.engine.groovy.inline.search: on" >> config/elasticsearch.yml
RUN /usr/share/elasticsearch/bin/plugin install analysis-icu RUN /usr/share/elasticsearch/bin/plugin install analysis-icu

View File

@@ -455,7 +455,57 @@ class ElasticSearchEngine implements SearchEngineInterface
$sort['record_id'] = $options->getSortOrder(); $sort['record_id'] = $options->getSortOrder();
} }
else { else {
$sort[sprintf('caption.%s.raw', $options->getSortBy())] = $options->getSortOrder(); $f = array_filter(
$options->getFields(),
function (databox_field $f) use($options) {
return $f->get_name() === $options->getSortBy();
}
);
if(count($f) == 1) {
// the field is found
$f = array_pop($f);
/** databox_field $f */
$k = sprintf('%scaption.%s', $f->isBusiness() ? "private_":"", $options->getSortBy());
switch ($f->get_type()) {
case databox_field::TYPE_DATE:
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "date"
];
break;
case databox_field::TYPE_NUMBER:
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "double"
];
break;
case databox_field::TYPE_STRING:
default:
$k .= '.sort';
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "keyword"
];
break;
}
}
/* script tryout
$sort["_script"] = [
'type' => "string",
'script' => [
// 'lang' => "painless",
'inline' => sprintf(
"doc['caption.%s'] ? doc['caption.%s.raw'].value : (doc['private_caption.%s'] ? doc['private_caption.%s.raw'].value : '')",
$options->getSortBy(), $options->getSortBy(), $options->getSortBy(), $options->getSortBy()
)
],
'order' => "asc"
];
*/
} }
if (!array_key_exists('record_id', $sort)) { if (!array_key_exists('record_id', $sort)) {

View File

@@ -83,6 +83,12 @@ class Index
{ {
$this->analysis = [ $this->analysis = [
'analyzer' => [ 'analyzer' => [
// used to sort
'sort' => [
'type' => 'custom',
'tokenizer' => 'keyword', // don't tokenize, keep whole value as a string
'filter' => ['lowercase', 'asciifolding'] // asciifolding = remove diacritics
],
// General purpose, without removing stop word or stem: improve meaning accuracy // General purpose, without removing stop word or stem: improve meaning accuracy
'general_light' => [ 'general_light' => [
'type' => 'custom', 'type' => 'custom',

View File

@@ -27,9 +27,10 @@ class FieldToFieldMappingConverter
$ret->disableIndexing(); $ret->disableIndexing();
} }
else { else {
$ret->addChild( // no more need "raw" for sorting (sort arg depends on type)
(new StringFieldMapping('raw')) // $ret->addChild(
->enableRawIndexing()); // (new StringFieldMapping('raw'))
// ->enableRawIndexing());
$ret->addChild( $ret->addChild(
(new StringFieldMapping('light')) (new StringFieldMapping('light'))
->setAnalyzer('general_light') ->setAnalyzer('general_light')
@@ -58,9 +59,10 @@ class FieldToFieldMappingConverter
$ret->disableIndexing(); $ret->disableIndexing();
} }
else { else {
$ret->addChild( // no more need "raw" for sorting (sort arg depends on type)
(new StringFieldMapping('raw')) // $ret->addChild(
->enableRawIndexing()); // (new StringFieldMapping('raw'))
// ->enableRawIndexing());
$ret->addChild( $ret->addChild(
(new StringFieldMapping('light')) (new StringFieldMapping('light'))
->setAnalyzer('general_light') ->setAnalyzer('general_light')

View File

@@ -57,6 +57,10 @@ class StringFieldMapping extends ComplexFieldMapping
$child->setAnalyzer('general_light'); $child->setAnalyzer('general_light');
$this->addChild($child); $this->addChild($child);
$child = new StringFieldMapping('sort');
$child->setAnalyzer('sort'); // custom = lowercase(keyword)
$this->addChild($child);
$child = new StringFieldMapping('truncated'); $child = new StringFieldMapping('truncated');
$child->setAnalyzer('truncation_analyzer', 'indexing'); $child->setAnalyzer('truncation_analyzer', 'indexing');
$child->setAnalyzer('truncation_analyzer#search', 'searching'); $child->setAnalyzer('truncation_analyzer#search', 'searching');