Files
Phraseanet/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch/ConfigurationPanel.php
Nicolas Le Goff 949bf06cac Merge branch '3.8'
Conflicts:
	CHANGELOG.md
	bin/console
	bin/developer
	bin/setup
	bower.json
	composer.json
	composer.lock
	features/bootstrap/FeatureContext.php
	features/bootstrap/GuiContext.php
	lib/Alchemy/Phrasea/Authentication/Token/TokenValidator.php
	lib/Alchemy/Phrasea/Command/BuildMissingSubdefs.php
	lib/Alchemy/Phrasea/Command/CreateCollection.php
	lib/Alchemy/Phrasea/Command/Developer/JavascriptBuilder.php
	lib/Alchemy/Phrasea/Controller/Admin/Collection.php
	lib/Alchemy/Phrasea/Controller/Admin/Databoxes.php
	lib/Alchemy/Phrasea/Controller/Admin/TaskManager.php
	lib/Alchemy/Phrasea/Controller/Api/V1.php
	lib/Alchemy/Phrasea/Controller/Client/Baskets.php
	lib/Alchemy/Phrasea/Controller/Client/Root.php
	lib/Alchemy/Phrasea/Controller/Prod/Basket.php
	lib/Alchemy/Phrasea/Controller/Prod/Export.php
	lib/Alchemy/Phrasea/Controller/Prod/Property.php
	lib/Alchemy/Phrasea/Controller/Prod/Records.php
	lib/Alchemy/Phrasea/Controller/Prod/Tools.php
	lib/Alchemy/Phrasea/Controller/Prod/Upload.php
	lib/Alchemy/Phrasea/Controller/Root/Login.php
	lib/Alchemy/Phrasea/Controller/Thesaurus/Thesaurus.php
	lib/Alchemy/Phrasea/Core/Event/ApiLoadEndEvent.php
	lib/Alchemy/Phrasea/Core/Event/ApiLoadStartEvent.php
	lib/Alchemy/Phrasea/Core/Provider/TaskManagerServiceProvider.php
	lib/Alchemy/Phrasea/Core/Version.php
	lib/Alchemy/Phrasea/Exception/XMLParseErrorException.php
	lib/Alchemy/Phrasea/Helper/DatabaseHelper.php
	lib/Alchemy/Phrasea/Helper/User/Edit.php
	lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php
	lib/Alchemy/Phrasea/SearchEngine/SearchEngineOptions.php
	lib/Doctrine/Entities/AuthFailure.php
	lib/Doctrine/Entities/Basket.php
	lib/Doctrine/Entities/BasketElement.php
	lib/Doctrine/Entities/LazaretAttribute.php
	lib/Doctrine/Entities/LazaretCheck.php
	lib/Doctrine/Entities/LazaretFile.php
	lib/Doctrine/Entities/LazaretSession.php
	lib/Doctrine/Entities/Session.php
	lib/Doctrine/Entities/SessionModule.php
	lib/Doctrine/Entities/StoryWZ.php
	lib/Doctrine/Entities/UsrList.php
	lib/Doctrine/Entities/UsrListEntry.php
	lib/Doctrine/Entities/UsrListOwner.php
	lib/Doctrine/Entities/ValidationData.php
	lib/Doctrine/Entities/ValidationParticipant.php
	lib/Doctrine/Entities/ValidationSession.php
	lib/Doctrine/Logger/MonologSQLLogger.php
	lib/Doctrine/Repositories/BasketRepository.php
	lib/Doctrine/Repositories/ValidationParticipantRepository.php
	lib/Doctrine/Types/Binary.php
	lib/Doctrine/Types/Blob.php
	lib/Doctrine/Types/Enum.php
	lib/Doctrine/Types/LongBlob.php
	lib/Doctrine/Types/VarBinary.php
	lib/classes/API/OAuth2/Account.php
	lib/classes/API/OAuth2/Application.php
	lib/classes/API/OAuth2/Application/OfficePlugin.php
	lib/classes/API/OAuth2/AuthCode.php
	lib/classes/API/OAuth2/RefreshToken.php
	lib/classes/API/OAuth2/Token.php
	lib/classes/API/V1/Abstract.php
	lib/classes/API/V1/Interface.php
	lib/classes/API/V1/adapter.php
	lib/classes/API/V1/exception/abstract.php
	lib/classes/API/V1/exception/badrequest.php
	lib/classes/API/V1/exception/forbidden.php
	lib/classes/API/V1/exception/internalservererror.php
	lib/classes/API/V1/exception/maintenance.php
	lib/classes/API/V1/exception/methodnotallowed.php
	lib/classes/API/V1/exception/notfound.php
	lib/classes/API/V1/exception/unauthorized.php
	lib/classes/API/V1/result.php
	lib/classes/Exception/Feed/EntryNotFound.php
	lib/classes/Exception/Feed/ItemNotFound.php
	lib/classes/Exception/Feed/PublisherNotFound.php
	lib/classes/Feed/Abstract.php
	lib/classes/Feed/Adapter.php
	lib/classes/Feed/Aggregate.php
	lib/classes/Feed/Collection.php
	lib/classes/Feed/CollectionInterface.php
	lib/classes/Feed/Entry/Adapter.php
	lib/classes/Feed/Entry/Collection.php
	lib/classes/Feed/Entry/CollectionInterface.php
	lib/classes/Feed/Entry/Interface.php
	lib/classes/Feed/Entry/Item.php
	lib/classes/Feed/Entry/ItemInterface.php
	lib/classes/Feed/Interface.php
	lib/classes/Feed/Link.php
	lib/classes/Feed/LinkInterface.php
	lib/classes/Feed/Publisher/Adapter.php
	lib/classes/Feed/Publisher/Interface.php
	lib/classes/Feed/Token.php
	lib/classes/Feed/TokenAggregate.php
	lib/classes/Feed/XML/Abstract.php
	lib/classes/Feed/XML/Atom.php
	lib/classes/Feed/XML/Cooliris.php
	lib/classes/Feed/XML/Interface.php
	lib/classes/Feed/XML/RSS.php
	lib/classes/Feed/XML/RSS/Image.php
	lib/classes/Feed/XML/RSS/ImageInterface.php
	lib/classes/User/Adapter.php
	lib/classes/User/Interface.php
	lib/classes/appbox/register.php
	lib/classes/connection.php
	lib/classes/connection/abstract.php
	lib/classes/connection/interface.php
	lib/classes/connection/pdo.php
	lib/classes/connection/pdoStatementDebugger.php
	lib/classes/deprecated/countries.php
	lib/classes/deprecated/inscript.api.php
	lib/classes/eventsmanager/event/test.php
	lib/classes/ftpclient.php
	lib/classes/http/request.php
	lib/classes/media/subdef.php
	lib/classes/module/console/schedulerStart.php
	lib/classes/module/console/schedulerState.php
	lib/classes/module/console/schedulerStop.php
	lib/classes/module/console/taskState.php
	lib/classes/module/console/tasklist.php
	lib/classes/module/console/taskrun.php
	lib/classes/patch/320alpha4b.php
	lib/classes/patch/3715alpha1a.php
	lib/classes/patch/379alpha1a.php
	lib/classes/patch/380alpha10a.php
	lib/classes/patch/380alpha11a.php
	lib/classes/patch/380alpha13a.php
	lib/classes/patch/380alpha14a.php
	lib/classes/patch/380alpha15a.php
	lib/classes/patch/380alpha16a.php
	lib/classes/patch/380alpha17a.php
	lib/classes/patch/380alpha18a.php
	lib/classes/patch/380alpha3a.php
	lib/classes/patch/380alpha4a.php
	lib/classes/patch/380alpha6a.php
	lib/classes/patch/380alpha8a.php
	lib/classes/patch/380alpha9a.php
	lib/classes/patch/381alpha1b.php
	lib/classes/patch/381alpha2a.php
	lib/classes/patch/381alpha3a.php
	lib/classes/patch/381alpha4a.php
	lib/classes/patch/383alpha1a.php
	lib/classes/patch/383alpha2a.php
	lib/classes/patch/383alpha3a.php
	lib/classes/patch/383alpha4a.php
	lib/classes/record/adapter.php
	lib/classes/record/preview.php
	lib/classes/recordutils.php
	lib/classes/recordutils/audio.php
	lib/classes/recordutils/document.php
	lib/classes/recordutils/map.php
	lib/classes/recordutils/video.php
	lib/classes/registry.php
	lib/classes/registryInterface.php
	lib/classes/set/order.php
	lib/classes/system/url.php
	lib/classes/task/Scheduler.php
	lib/classes/task/appboxAbstract.php
	lib/classes/task/databoxAbstract.php
	lib/classes/task/manager.php
	lib/classes/task/period/RecordMover.php
	lib/classes/task/period/apibridge.php
	lib/classes/task/period/apiwebhooks.php
	lib/classes/task/period/archive.php
	lib/classes/task/period/cindexer.php
	lib/classes/task/period/emptyColl.php
	lib/classes/task/period/ftp.php
	lib/classes/task/period/ftpPull.php
	lib/classes/task/period/subdef.php
	lib/classes/task/period/test.php
	lib/classes/task/period/writemeta.php
	lib/conf.d/PhraseaFixture/AbstractWZ.php
	lib/conf.d/PhraseaFixture/Basket/LoadFiveBaskets.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasket.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasketEnv.php
	lib/conf.d/PhraseaFixture/Lazaret/LoadOneFile.php
	lib/conf.d/PhraseaFixture/Story/LoadOneStory.php
	lib/conf.d/PhraseaFixture/UsrLists/ListAbstract.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrList.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListEntry.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListOwner.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadOneParticipant.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadParticipantWithSession.php
	lib/conf.d/PhraseaFixture/ValidationSession/LoadOneValidationSession.php
	templates/web/admin/collection/collection.html.twig
	templates/web/common/dialog_export.html.twig
	templates/web/common/menubar.html.twig
	templates/web/prod/actions/Tools/index.html.twig
	templates/web/prod/index.html.twig
	templates/web/prod/upload/upload-flash.html.twig
	templates/web/prod/upload/upload.html.twig
	templates/web/report/report_layout_child.html.twig
	templates/web/setup/step2.html.twig
	templates/web/thesaurus/new-synonym-dialog.html.twig
	templates/web/thesaurus/properties.html.twig
	templates/web/thesaurus/search.html.twig
	tests/Alchemy/Tests/Phrasea/Application/ApiAbstract.php
	tests/Alchemy/Tests/Phrasea/Cache/FactoryTest.php
	tests/Alchemy/Tests/Phrasea/Controller/Admin/AdminCollectionTest.php
	tests/Alchemy/Tests/Phrasea/Controller/Client/RootTest.php
2015-02-05 18:38:49 +01:00

739 lines
24 KiB
PHP

<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2015 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Alchemy\Phrasea\SearchEngine\SphinxSearch;
use Alchemy\Phrasea\Core\Configuration\PropertyAccess;
use Alchemy\Phrasea\SearchEngine\AbstractConfigurationPanel;
use Alchemy\Phrasea\Application;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Finder\Finder;
class ConfigurationPanel extends AbstractConfigurationPanel
{
const DATE_FIELD_PREFIX = 'date_field_';
protected $charsets;
protected $searchEngine;
public function __construct(SphinxSearchEngine $engine, PropertyAccess $conf)
{
$this->searchEngine = $engine;
$this->conf = $conf;
}
/**
* {@inheritdoc}
*/
public function getName()
{
return 'sphinx-search';
}
/**
* {@inheritdoc}
*/
public function get(Application $app, Request $request)
{
$configuration = $this->getConfiguration();
$params = [
'configuration' => $configuration,
'configfile' => $this->generateSphinxConf($app['phraseanet.appbox']->get_databoxes(), $configuration),
'charsets' => $this->getAvailableCharsets(),
'date_fields' => $this->getAvailableDateFields($app['phraseanet.appbox']->get_databoxes()),
];
return $app['twig']->render('admin/search-engine/sphinx-search.html.twig', $params);
}
/**
* {@inheritdoc}
*/
public function post(Application $app, Request $request)
{
$configuration = $this->getConfiguration();
$configuration['charset_tables'] = [];
$configuration['date_fields'] = [];
foreach ($request->request->get('charset_tables', []) as $table) {
$configuration['charset_tables'][] = $table;
}
foreach ($request->request->get('date_fields', []) as $field) {
$configuration['date_fields'][] = $field;
}
$configuration['host'] = $request->request->get('host');
$configuration['port'] = $request->request->get('port');
$configuration['rt_host'] = $request->request->get('rt_host');
$configuration['rt_port'] = $request->request->get('rt_port');
$this->saveConfiguration($configuration);
return $app->redirectPath('admin_searchengine_get');
}
/**
* {@inheritdoc}
*/
public function getConfiguration()
{
$configuration = $this->conf->get(['main', 'search-engine', 'options'], []);
return self::populateConfiguration($configuration);
}
/**
* Returns all the charset Sphinx Search supports
*
* @return array An array of charsets
*/
public function getAvailableCharsets()
{
if (null !== $this->charsets) {
return $this->charsets;
}
$this->charsets = [];
$finder = new Finder();
$finder->in(__DIR__ . '/Charset/')->files()->name('*.php');
foreach ($finder as $file) {
$name = substr($file->getFilename(), 0, -4);
$classname = __NAMESPACE__ . '\\Charset\\' . $name;
if (class_exists($classname)) {
$this->charsets[$name] = new $classname;
}
}
ksort($this->charsets);
return $this->charsets;
}
/**
* Generates Sphinx Search configuration depending on the service configuration
*
* @param array $databoxes The databoxes to index
* @param array $configuration The configuration
* @return string The sphinx search configuration
*/
public function generateSphinxConf(array $databoxes, array $configuration)
{
$options = self::populateConfiguration($configuration);
$options['charset_tables'] = array_unique($options['charset_tables']);
$conf = '';
$charsets = '';
foreach ($options['charset_tables'] as $charset) {
$classname = __NAMESPACE__ . '\\Charset\\' . $charset;
if (class_exists($classname)) {
$charset_table = new $classname();
$charsets .= $charset_table->get_table();
}
}
$charsets = explode("\n", $charsets);
$last_detect = false;
for ($i = (count($charsets) - 1); $i >= 0; $i--) {
if (trim($charsets[$i]) === '') {
unset($charsets[$i]);
continue;
}
if (strpos(trim($charsets[$i]), '#') === 0) {
unset($charsets[$i]);
continue;
}
if ($last_detect === true && substr(trim($charsets[$i]), (strlen(trim($charsets[$i])) - 1), 1) !== ',')
$charsets[$i] = rtrim($charsets[$i]) . ', ';
$charsets[$i] = " " . $charsets[$i] . " \\\n";
$last_detect = true;
}
$charsets = "\\\n" . implode('', $charsets);
$charset_abstract = '
docinfo = extern
charset_type = utf-8
charset_table = ' . $charsets . '
# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 0
# enable star character search
enable_star = 1
# enable star search like cat*
min_prefix_len = 0
# enable star search like *aculous
min_infix_len = 1
';
foreach ($databoxes as $databox) {
$index_crc = $this->searchEngine->CRCdatabox($databox);
$date_selects = $date_left_joins = $date_fields = [];
foreach ($configuration['date_fields'] as $name) {
$field = $databox->get_meta_structure()->get_element_by_name($name);
$date_fields[] = self::DATE_FIELD_PREFIX . $name;
if ($field instanceof \databox_field) {
$date_selects[] = ", UNIX_TIMESTAMP(d" . $field->get_id() . ".value) as " . self::DATE_FIELD_PREFIX . $name;
$date_left_joins[] = " LEFT JOIN metadatas d" . $field->get_id() . " ON (d" . $field->get_id() . ".record_id = r.record_id AND d" . $field->get_id() . ".meta_struct_id = " . $field->get_id() . ")";
} else {
$date_selects[] = ", null as " . $name;
}
}
$conf .= '
#------------------------------------------------------------------------------
# ***************** ' . $databox->get_dbname() . '
#------------------------------------------------------------------------------
#--------------------------------------
### Sources Abstract
source database_cfg' . $index_crc . '
{
type = mysql
sql_host = ' . $databox->get_host() . '
sql_user = ' . $databox->get_user() . '
sql_pass =
sql_db = ' . $databox->get_dbname() . '
sql_port = ' . $databox->get_port() . '
# We retrieve datas in UTF-8
sql_query_pre = SET character_set_results = "utf8", character_set_client = "utf8", \
character_set_connection = "utf8", character_set_database = "utf8", \
character_set_server = "utf8"
sql_query_pre = SET NAMES utf8
}
#--------------------------------------
### Suggestions Sources
source src_suggest' . $index_crc . ' : database_cfg' . $index_crc . '
{
sql_query = SELECT id, keyword, trigrams, freq, LENGTH(keyword) AS len FROM suggest
sql_attr_uint = freq
sql_attr_uint = len
sql_attr_string = keyword
}
index suggest' . $index_crc . '
{
source = src_suggest' . $index_crc . '
path = /var/sphinx/datas/suggest_' . $index_crc . '
' . $charset_abstract . '
}
#--------------------------------------
### Metadatas Sources
source src_metadatas' . $index_crc . ' : database_cfg' . $index_crc . '
{
sql_query = \
SELECT m.id, m.meta_struct_id, m.record_id, m.value, \
' . $databox->get_sbas_id() . ' as sbas_id, s.id, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.id)) as crc_struct_id, \
CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.id) as struct_id, \
r.parent_record_id, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id)) as crc_sbas_coll, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.record_id)) as crc_sbas_record, \
CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id) as sbas_coll, \
CRC32(r.type) as crc_type, r.coll_id, \
UNIX_TIMESTAMP(credate) as created_on, 0 as deleted, \
CRC32(CONCAT_WS("_", r.coll_id, s.business)) as crc_coll_business, \
s.business \
' . implode(" \\\n", $date_selects) . ' \
FROM (metadatas m, metadatas_structure s, record r) \
' . implode(" \\\n", $date_left_joins) . ' \
WHERE m.record_id = r.record_id AND m.meta_struct_id = s.id \
AND s.indexable = "1"
# documents can be filtered / sorted on each sql_attr
sql_attr_uint = record_id
sql_attr_uint = sbas_id
sql_attr_uint = coll_id
sql_attr_uint = parent_record_id
sql_attr_uint = crc_struct_id
sql_attr_uint = crc_sbas_coll
sql_attr_uint = crc_sbas_record
sql_attr_uint = crc_type
sql_attr_uint = deleted
sql_attr_uint = business
sql_attr_uint = crc_coll_business
sql_attr_timestamp = created_on
';
foreach ($date_fields as $date_field) {
$conf.= " sql_attr_timestamp = $date_field\n";
}
$conf .= '
sql_attr_multi = uint status from query; SELECT m.id as id, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.name)) as name \
FROM metadatas m, status s \
WHERE s.record_id = m.record_id AND s.value = 1 \
ORDER BY m.id ASC
# datas returned in the resultset
sql_query_info = SELECT r.* FROM record r, metadatas m \
WHERE m.id=$id AND m.record_id = r.record_id
}
#--------------------------------------
### Metadatas Index
index metadatas' . $index_crc . ' : suggest' . $index_crc . '
{
source = src_metadatas' . $index_crc . '
path = /var/sphinx/datas/metadatas_' . $index_crc . '
}
#--------------------------------------
### Metadatas Index Stemmed
index metadatas' . $index_crc . '_stemmed_fr : suggest' . $index_crc . '
{
source = src_metadatas' . $index_crc . '
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_fr
morphology = libstemmer_fr
# minimum word length at which to enable stemming
# optional, default is 1 (stem everything)
#
min_stemming_len = 1
# whether to index original keywords along with stemmed versions
# enables "=exactform" operator to work
# optional, default is 0
#
index_exact_words = 1
}
index metadatas' . $index_crc . '_stemmed_en : metadatas' . $index_crc . '_stemmed_fr
{
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_en
morphology = libstemmer_en
}
index metadatas' . $index_crc . '_stemmed_nl : metadatas' . $index_crc . '_stemmed_fr
{
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_nl
morphology = libstemmer_nl
}
index metadatas' . $index_crc . '_stemmed_de : metadatas' . $index_crc . '_stemmed_fr
{
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_de
morphology = libstemmer_de
}
#--------------------------------------
### METAS_REALTIME Index
index metas_realtime' . $index_crc . '
{
type = rt
path = /var/sphinx/datas/metas_realtime_' . $index_crc . '
' . $charset_abstract . '
rt_field = value
rt_field = meta_struct_id
rt_attr_uint = record_id
rt_attr_uint = sbas_id
rt_attr_uint = coll_id
rt_attr_uint = parent_record_id
rt_attr_uint = crc_struct_id
rt_attr_uint = crc_sbas_coll
rt_attr_uint = crc_sbas_record
rt_attr_uint = crc_type
rt_attr_uint = deleted
rt_attr_uint = business
rt_attr_uint = crc_coll_business
rt_attr_timestamp = created_on
';
foreach ($date_fields as $date_field) {
$conf.= " rt_attr_timestamp = $date_field\n";
}
$conf .= ' rt_attr_multi = status
}
index metas_realtime_stemmed_fr_' . $index_crc . ' : metas_realtime' . $index_crc . '
{
type = rt
morphology = libstemmer_fr
min_stemming_len = 1
index_exact_words = 1
path = /var/sphinx/datas/metas_realtime_stemmed_fr_' . $index_crc . '
}
index metas_realtime_stemmed_en_' . $index_crc . ' : metas_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_en
path = /var/sphinx/datas/metas_realtime_stemmed_en_' . $index_crc . '
}
index metas_realtime_stemmed_de_' . $index_crc . ' : metas_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_de
path = /var/sphinx/datas/metas_realtime_stemmed_de_' . $index_crc . '
}
index metas_realtime_stemmed_nl_' . $index_crc . ' : metas_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_nl
path = /var/sphinx/datas/metas_realtime_stemmed_nl_' . $index_crc . '
}
#--------------------------------------
### All documents Index (give the last 1000 records added, etc...)
source src_documents' . $index_crc . ' : database_cfg' . $index_crc . '
{
sql_query = \
SELECT r.record_id as id, r.record_id, r.parent_record_id, ' . $databox->get_sbas_id() . ' as sbas_id, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id)) as crc_sbas_coll, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.record_id)) as crc_sbas_record, \
CONCAT_WS("_", ' . $databox->get_sbas_id() . ' , r.coll_id) as sbas_coll, \
CRC32(r.type) as crc_type, r.coll_id, \
UNIX_TIMESTAMP(r.credate) as created_on, 0 as deleted \
' . implode(" \\\n", $date_selects) . ' \
FROM (record r) \
' . implode(" \\\n", $date_left_joins) . ' \
WHERE 1
# documents can be filtered / sorted on each sql_attr
sql_attr_uint = record_id
sql_attr_uint = sbas_id
sql_attr_uint = coll_id
sql_attr_uint = parent_record_id
sql_attr_uint = crc_sbas_coll
sql_attr_uint = crc_sbas_record
sql_attr_uint = crc_type
sql_attr_uint = deleted
sql_attr_timestamp = created_on
';
foreach ($date_fields as $date_field) {
$conf.= " sql_attr_timestamp = $date_field\n";
}
$conf .= '
sql_attr_multi = uint status from query; SELECT r.record_id as id, \
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.name)) as name \
FROM record r, status s \
WHERE s.record_id = r.record_id AND s.value = 1 \
ORDER BY r.record_id ASC
sql_joined_field = metas from query; \
SELECT m.record_id as id, m.value \
FROM metadatas m, metadatas_structure s \
WHERE s.id = m.meta_struct_id AND s.business = 0 \
ORDER BY m.record_id ASC
# datas returned in the resultset
sql_query_info = SELECT r.* FROM record r WHERE r.record_id=$id
}
#--------------------------------------
### All documents Index
index documents' . $index_crc . ' : suggest' . $index_crc . '
{
source = src_documents' . $index_crc . '
path = /var/sphinx/datas/documents_' . $index_crc . '
morphology = none
}
index documents' . $index_crc . '_stemmed_fr : documents' . $index_crc . '
{
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_fr
morphology = libstemmer_fr
# minimum word length at which to enable stemming
# optional, default is 1 (stem everything)
#
min_stemming_len = 1
# whether to index original keywords along with stemmed versions
# enables "=exactform" operator to work
# optional, default is 0
#
index_exact_words = 1
}
index documents' . $index_crc . '_stemmed_en : documents' . $index_crc . '
{
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_en
morphology = libstemmer_en
}
index documents' . $index_crc . '_stemmed_de : documents' . $index_crc . '
{
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_de
morphology = libstemmer_de
}
index documents' . $index_crc . '_stemmed_nl : documents' . $index_crc . '
{
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_nl
morphology = libstemmer_nl
}
#--------------------------------------
### DOCS_REALTIME Index
index docs_realtime' . $index_crc . '
{
type = rt
path = /var/sphinx/datas/docs_realtime_' . $index_crc . '
' . $charset_abstract . '
rt_field = value
rt_attr_uint = record_id
rt_attr_uint = sbas_id
rt_attr_uint = coll_id
rt_attr_uint = parent_record_id
rt_attr_uint = crc_sbas_coll
rt_attr_uint = crc_sbas_record
rt_attr_uint = crc_type
rt_attr_uint = deleted
rt_attr_timestamp = created_on
';
foreach ($date_fields as $date_field) {
$conf.= " rt_attr_timestamp = $date_field\n";
}
$conf .= ' rt_attr_multi = status
}
index docs_realtime_stemmed_fr_' . $index_crc . ' : docs_realtime' . $index_crc . '
{
type = rt
morphology = libstemmer_fr
min_stemming_len = 1
index_exact_words = 1
path = /var/sphinx/datas/docs_realtime_stemmed_fr_' . $index_crc . '
}
index docs_realtime_stemmed_en_' . $index_crc . ' : docs_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_en
path = /var/sphinx/datas/docs_realtime_stemmed_en_' . $index_crc . '
}
index docs_realtime_stemmed_de_' . $index_crc . ' : docs_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_de
path = /var/sphinx/datas/docs_realtime_stemmed_de_' . $index_crc . '
}
index docs_realtime_stemmed_nl_' . $index_crc . ' : docs_realtime_stemmed_fr_' . $index_crc . '
{
morphology = libstemmer_nl
path = /var/sphinx/datas/docs_realtime_stemmed_nl_' . $index_crc . '
}
#------------------------------------------------------------------------------
# ***************** End configuration for ' . $databox->get_dbname() . '
#------------------------------------------------------------------------------
';
}
$conf .='
#******************************************************************************
#****************** Sphinx Indexer Configuration ****************************
#******************************************************************************
indexer {
mem_limit = 512M
# maximum IO calls per second (for I/O throttling)
# optional, default is 0 (unlimited)
#
# max_iops = 40
# maximum IO call size, bytes (for I/O throttling)
# optional, default is 0 (unlimited)
#
# max_iosize = 1048576
}
#******************************************************************************
#****************** Sphinx Search Daemon Configuration **********************
#******************************************************************************
searchd
{
# [hostname:]port[:protocol], or /unix/socket/path to listen on
# known protocols are \'sphinx\' (SphinxAPI) and \'mysql41\' (SphinxQL)
#
# multi-value, multiple listen points are allowed
# optional, defaults are 9312:sphinx and 9306:mysql41, as below
#
# listen = 127.0.0.1
# listen = 192.168.0.1:9312
# listen = 9312
# listen = /var/run/searchd.sock
listen = '.$options['host'].':'.$options['port'].'
listen = '.$options['rt_host'].':'.$options['rt_port'].':mysql41
# log file, searchd run info is logged here
# optional, default is \'searchd.log\'
log = /var/sphinx/searchd.log
# query log file, all search queries are logged here
# optional, default is empty (do not log queries)
query_log = /var/sphinx/query.log
# client read timeout, seconds
# optional, default is 5
read_timeout = 5
# request timeout, seconds
# optional, default is 5 minutes
client_timeout = 300
# maximum amount of children to fork (concurrent searches to run)
# optional, default is 0 (unlimited)
max_children = 30
# PID file, searchd process ID file name
# mandatory
pid_file = /var/sphinx/searchd.pid
# max amount of matches the daemon ever keeps in RAM, per-index
# WARNING, THERE\'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
# default is 1000 (just like Google)
max_matches = 1000000
# seamless rotate, prevents rotate stalls if precaching huge datasets
# optional, default is 1
seamless_rotate = 1
# whether to forcibly preopen all indexes on startup
# optional, default is 0 (do not preopen)
preopen_indexes = 1
# whether to unlink .old index copies on succesful rotation.
# optional, default is 1 (do unlink)
unlink_old = 1
# multi-processing mode (MPM)
# known values are none, fork, prefork, and threads
# optional, default is fork
#
workers = threads # for RT to work
# binlog files path; use empty string to disable binlog
# optional, default is build-time configured data directory
#
# binlog_path = # disable logging
# binlog_path = /var/data # binlog.001 etc will be created there
binlog_path =
# binlog flush/sync mode
# 0 means flush and sync every second
# 1 means flush and sync every transaction
# 2 means flush every transaction, sync every second
# optional, default is 2
#
binlog_flush = 2
# binlog per-file size limit
# optional, default is 128M, 0 means no limit
#
# binlog_max_log_size = 256M
# max threads to create for searching local parts of a distributed index
# optional, default is 0, which means disable multi-threaded searching
# should work with all MPMs (ie. does NOT require workers=threads)
#
dist_threads = 4
# max common subtree document cache size, per-query
# optional, default is 0 (disable subtree optimization)
#
subtree_docs_cache = 4M
# max common subtree hit cache size, per-query
# optional, default is 0 (disable subtree optimization)
#
subtree_hits_cache = 8M
# max allowed per-query filter count
# optional, default is 256
#
max_filters = 512
compat_sphinxql_magics = 0
}
';
return $conf;
}
/**
* Populates a configuration with the default options, if missing.
*
* @param array $configuration
*
* @return array
*/
public static function populateConfiguration(array $configuration)
{
return array_replace([
'charset_tables' => ["common", "latin"],
'date_fields' => [],
'host' => '127.0.0.1',
'port' => 9312,
'rt_host' => '127.0.0.1',
'rt_port' => 9306,
], $configuration);
}
}