Merge pull request #3904 from alchemy-fr/PHRAS-3583_sort-on-dates

PHRAS-3583 merge sort improvement
This commit is contained in:
Nicolas Maillat
2021-11-18 19:04:06 +01:00
committed by GitHub
5 changed files with 72 additions and 1 deletions

View File

@@ -1,3 +1,6 @@
FROM elasticsearch:2.4
# uncomment to allow (groovy) script in search/sort etc.
# RUN echo "script.engine.groovy.inline.search: on" >> config/elasticsearch.yml
RUN /usr/share/elasticsearch/bin/plugin install analysis-icu

View File

@@ -455,7 +455,57 @@ class ElasticSearchEngine implements SearchEngineInterface
$sort['record_id'] = $options->getSortOrder();
}
else {
$sort[sprintf('caption.%s.raw', $options->getSortBy())] = $options->getSortOrder();
$f = array_filter(
$options->getFields(),
function (databox_field $f) use($options) {
return $f->get_name() === $options->getSortBy();
}
);
if(count($f) == 1) {
// the field is found
$f = array_pop($f);
/** databox_field $f */
$k = sprintf('%scaption.%s', $f->isBusiness() ? "private_":"", $options->getSortBy());
switch ($f->get_type()) {
case databox_field::TYPE_DATE:
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "date"
];
break;
case databox_field::TYPE_NUMBER:
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "double"
];
break;
case databox_field::TYPE_STRING:
default:
$k .= '.sort';
$sort[$k] = [
'order' => $options->getSortOrder(),
'missing' => "_last",
'unmapped_type' => "keyword"
];
break;
}
}
/* script tryout
$sort["_script"] = [
'type' => "string",
'script' => [
// 'lang' => "painless",
'inline' => sprintf(
"doc['caption.%s'] ? doc['caption.%s.raw'].value : (doc['private_caption.%s'] ? doc['private_caption.%s.raw'].value : '')",
$options->getSortBy(), $options->getSortBy(), $options->getSortBy(), $options->getSortBy()
)
],
'order' => "asc"
];
*/
}
if (!array_key_exists('record_id', $sort)) {

View File

@@ -83,6 +83,12 @@ class Index
{
$this->analysis = [
'analyzer' => [
// used to sort
'sort' => [
'type' => 'custom',
'tokenizer' => 'keyword', // don't tokenize, keep whole value as a string
'filter' => ['lowercase', 'asciifolding'] // asciifolding = remove diacritics
],
// General purpose, without removing stop word or stem: improve meaning accuracy
'general_light' => [
'type' => 'custom',

View File

@@ -27,6 +27,10 @@ class FieldToFieldMappingConverter
$ret->disableIndexing();
}
else {
// no more need "raw" for sorting (sort arg depends on type)
// $ret->addChild(
// (new StringFieldMapping('raw'))
// ->enableRawIndexing());
$ret->addChild(
(new StringFieldMapping('light'))
->setAnalyzer('general_light')
@@ -55,6 +59,10 @@ class FieldToFieldMappingConverter
$ret->disableIndexing();
}
else {
// no more need "raw" for sorting (sort arg depends on type)
// $ret->addChild(
// (new StringFieldMapping('raw'))
// ->enableRawIndexing());
$ret->addChild(
(new StringFieldMapping('light'))
->setAnalyzer('general_light')

View File

@@ -57,6 +57,10 @@ class StringFieldMapping extends ComplexFieldMapping
$child->setAnalyzer('general_light');
$this->addChild($child);
$child = new StringFieldMapping('sort');
$child->setAnalyzer('sort'); // custom = lowercase(keyword)
$this->addChild($child);
$child = new StringFieldMapping('truncated');
$child->setAnalyzer('truncation_analyzer', 'indexing');
$child->setAnalyzer('truncation_analyzer#search', 'searching');