mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-11 12:03:14 +00:00
590 lines
19 KiB
PHP
590 lines
19 KiB
PHP
<?php
|
|
|
|
namespace Alchemy\Phrasea\SearchEngine\SphinxSearch;
|
|
|
|
use Alchemy\Phrasea\SearchEngine\ConfigurationPanelInterface;
|
|
use Silex\Application;
|
|
use Symfony\Component\Finder\Finder;
|
|
use Symfony\Component\HttpFoundation\Request;
|
|
|
|
class ConfigurationPanel implements ConfigurationPanelInterface
|
|
{
|
|
protected $charsets;
|
|
protected $searchEngine;
|
|
|
|
public function __construct(SphinxSearchEngine $engine)
|
|
{
|
|
$this->searchEngine = $engine;
|
|
}
|
|
|
|
public function get(Application $app, Request $request)
|
|
{
|
|
$configuration = $this->getConfiguration();
|
|
|
|
$params = array(
|
|
'configuration' => $configuration,
|
|
'configfile' => $this->generateSphinxConf($app['phraseanet.appbox']->get_databoxes(), $configuration),
|
|
'charsets' => $this->get_available_charsets(),
|
|
);
|
|
|
|
return $app['twig']->render('admin/search-engine/sphinx-search.html.twig', $params);
|
|
}
|
|
|
|
public function getConfiguration()
|
|
{
|
|
$configuration = @json_decode(file_get_contents(__DIR__ . '/../../../../../config/sphinx-search.json'), true);
|
|
|
|
if ( ! is_array($configuration)) {
|
|
$configuration = array();
|
|
}
|
|
|
|
if ( ! isset($configuration['charset_tables'])) {
|
|
$configuration['charset_tables'] = array("common","latin");
|
|
}
|
|
|
|
return $configuration;
|
|
}
|
|
|
|
public function post(Application $app, Request $request)
|
|
{
|
|
$configuration = $this->getConfiguration();
|
|
$configuration['charset_tables'] = array();
|
|
|
|
foreach ($request->request->get('charset_tables', array()) as $table) {
|
|
$configuration['charset_tables'][] = $table;
|
|
}
|
|
|
|
file_put_contents(__DIR__ . '/../../../../../config/sphinx-search.json', json_encode($configuration));
|
|
|
|
return $app->redirect($app['url_generator']->generate('admin_searchengine_get'));
|
|
}
|
|
|
|
public function get_available_charsets()
|
|
{
|
|
if (null !== $this->charsets) {
|
|
return $this->charsets;
|
|
}
|
|
|
|
$this->charsets = array();
|
|
|
|
$finder = new \Symfony\Component\Finder\Finder();
|
|
$finder->in(__DIR__ . '/Charset/')->files()->name('*.php');
|
|
|
|
foreach ($finder as $file) {
|
|
$name = substr($file->getFilename(), 0, -4);
|
|
$classname = __NAMESPACE__ . '\\Charset\\' . $name;
|
|
if (class_exists($classname)) {
|
|
$this->charsets[$name] = new $classname;
|
|
}
|
|
}
|
|
|
|
ksort($this->charsets);
|
|
|
|
return $this->charsets;
|
|
}
|
|
|
|
public function generateSphinxConf(array $databoxes, array $configuration)
|
|
{
|
|
$defaults = array(
|
|
'charset_tables' => array(),
|
|
);
|
|
|
|
$options = array_merge($defaults, $configuration);
|
|
|
|
$options['charset_tables'] = array_unique($options['charset_tables']);
|
|
|
|
$conf = '';
|
|
|
|
$charsets = '';
|
|
foreach ($options['charset_tables'] as $charset) {
|
|
$classname = __NAMESPACE__ . '\\Charset\\' . $charset;
|
|
if (class_exists($classname)) {
|
|
$charset_table = new $classname();
|
|
$charsets .= $charset_table->get_table();
|
|
}
|
|
}
|
|
|
|
$charsets = explode("\n", $charsets);
|
|
$last_detect = false;
|
|
|
|
for ($i = (count($charsets) - 1); $i >= 0; $i -- ) {
|
|
if (trim($charsets[$i]) === '') {
|
|
unset($charsets[$i]);
|
|
continue;
|
|
}
|
|
if (strpos(trim($charsets[$i]), '#') === 0) {
|
|
unset($charsets[$i]);
|
|
continue;
|
|
}
|
|
if ($last_detect === true && substr(trim($charsets[$i]), (strlen(trim($charsets[$i])) - 1), 1) !== ',')
|
|
$charsets[$i] = rtrim($charsets[$i]) . ', ';
|
|
$charsets[$i] = " " . $charsets[$i] . " \\\n";
|
|
$last_detect = true;
|
|
}
|
|
|
|
$charsets = "\\\n" . implode('', $charsets);
|
|
|
|
$charset_abstract = '
|
|
|
|
docinfo = extern
|
|
charset_type = utf-8
|
|
|
|
charset_table = ' . $charsets . '
|
|
|
|
# minimum indexed word length
|
|
# default is 1 (index everything)
|
|
min_word_len = 1
|
|
|
|
|
|
# whether to strip HTML tags from incoming documents
|
|
# known values are 0 (do not strip) and 1 (do strip)
|
|
# optional, default is 0
|
|
html_strip = 0
|
|
|
|
|
|
# enable star character search
|
|
enable_star = 1
|
|
|
|
# enable star search like cat*
|
|
min_prefix_len = 0
|
|
|
|
# enable star search like *aculous
|
|
min_infix_len = 1
|
|
';
|
|
|
|
foreach ($databoxes as $databox) {
|
|
|
|
$index_crc = $this->searchEngine->CRCdatabox($databox);
|
|
|
|
$conf .= '
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# ***************** ' . $databox->get_viewname() . '
|
|
#------------------------------------------------------------------------------
|
|
|
|
|
|
#--------------------------------------
|
|
### Sources Abstract
|
|
|
|
source database_cfg' . $index_crc . '
|
|
{
|
|
type = mysql
|
|
sql_host = ' . $databox->get_host() . '
|
|
sql_user = ' . $databox->get_user() . '
|
|
sql_pass =
|
|
sql_db = ' . $databox->get_dbname() . '
|
|
sql_port = ' . $databox->get_port() . '
|
|
|
|
# We retrieve datas in UTF-8
|
|
sql_query_pre = SET character_set_results = "utf8", character_set_client = "utf8", \
|
|
character_set_connection = "utf8", character_set_database = "utf8", \
|
|
character_set_server = "utf8"
|
|
sql_query_pre = SET NAMES utf8
|
|
}
|
|
|
|
#--------------------------------------
|
|
### Suggestions Sources
|
|
source src_suggest' . $index_crc . ' : database_cfg' . $index_crc . '
|
|
{
|
|
sql_query = SELECT id, keyword, trigrams, freq, LENGTH(keyword) AS len FROM suggest
|
|
|
|
sql_attr_uint = freq
|
|
sql_attr_uint = len
|
|
sql_attr_string = keyword
|
|
}
|
|
|
|
index suggest' . $index_crc . '
|
|
{
|
|
source = src_suggest' . $index_crc . '
|
|
path = /var/sphinx/datas/suggest_' . $index_crc . '
|
|
|
|
' . $charset_abstract . '
|
|
}
|
|
|
|
#--------------------------------------
|
|
### Metadatas Sources
|
|
source src_metadatas' . $index_crc . ' : database_cfg' . $index_crc . '
|
|
{
|
|
sql_query = \
|
|
SELECT m.id, m.meta_struct_id, m.record_id, m.value, \
|
|
' . $databox->get_sbas_id() . ' as sbas_id, s.id, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.id)) as crc_struct_id, \
|
|
CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.id) as struct_id, \
|
|
r.parent_record_id, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id)) as crc_sbas_coll, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.record_id)) as crc_sbas_record, \
|
|
CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id) as sbas_coll, \
|
|
CRC32(r.type) as crc_type, r.coll_id, \
|
|
UNIX_TIMESTAMP(credate) as created_on, 0 as deleted, \
|
|
CRC32(CONCAT_WS("_", r.coll_id, s.business)) as crc_coll_business, \
|
|
s.business \
|
|
FROM metadatas m, metadatas_structure s, record r \
|
|
WHERE m.record_id = r.record_id AND m.meta_struct_id = s.id \
|
|
AND s.indexable = "1"
|
|
|
|
# documents can be filtered / sorted on each sql_attr
|
|
sql_attr_uint = record_id
|
|
sql_attr_uint = sbas_id
|
|
sql_attr_uint = coll_id
|
|
sql_attr_uint = parent_record_id
|
|
sql_attr_uint = crc_struct_id
|
|
sql_attr_uint = crc_sbas_coll
|
|
sql_attr_uint = crc_sbas_record
|
|
sql_attr_uint = crc_type
|
|
sql_attr_uint = deleted
|
|
sql_attr_uint = business
|
|
sql_attr_uint = crc_coll_business
|
|
sql_attr_timestamp = created_on
|
|
|
|
sql_attr_multi = uint status from query; SELECT m.id as id, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.name)) as name \
|
|
FROM metadatas m, status s \
|
|
WHERE s.record_id = m.record_id AND s.value = 1 \
|
|
ORDER BY m.id ASC
|
|
|
|
# datas returned in the resultset
|
|
sql_query_info = SELECT r.* FROM record r, metadatas m \
|
|
WHERE m.id=$id AND m.record_id = r.record_id
|
|
}
|
|
|
|
#--------------------------------------
|
|
### Metadatas Index
|
|
|
|
index metadatas' . $index_crc . ' : suggest' . $index_crc . '
|
|
{
|
|
source = src_metadatas' . $index_crc . '
|
|
path = /var/sphinx/datas/metadatas_' . $index_crc . '
|
|
|
|
}
|
|
|
|
#--------------------------------------
|
|
### Metadatas Index Stemmed FR
|
|
|
|
index metadatas' . $index_crc . '_stemmed_fr : suggest' . $index_crc . '
|
|
{
|
|
source = src_metadatas' . $index_crc . '
|
|
|
|
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_fr
|
|
|
|
morphology = libstemmer_fr
|
|
|
|
# minimum word length at which to enable stemming
|
|
# optional, default is 1 (stem everything)
|
|
#
|
|
min_stemming_len = 1
|
|
|
|
# whether to index original keywords along with stemmed versions
|
|
# enables "=exactform" operator to work
|
|
# optional, default is 0
|
|
#
|
|
index_exact_words = 1
|
|
}
|
|
|
|
#--------------------------------------
|
|
### Metadatas Index Stemmed EN
|
|
|
|
index metadatas' . $index_crc . '_stemmed_en : suggest' . $index_crc . '
|
|
{
|
|
source = src_metadatas' . $index_crc . '
|
|
|
|
path = /var/sphinx/datas/metadatas_' . $index_crc . '_stemmed_en
|
|
|
|
morphology = libstemmer_en
|
|
|
|
# minimum word length at which to enable stemming
|
|
# optional, default is 1 (stem everything)
|
|
#
|
|
min_stemming_len = 1
|
|
|
|
# whether to index original keywords along with stemmed versions
|
|
# enables "=exactform" operator to work
|
|
# optional, default is 0
|
|
#
|
|
index_exact_words = 1
|
|
}
|
|
|
|
#--------------------------------------
|
|
### METAS_REALTIME Index
|
|
|
|
index metas_realtime' . $index_crc . '
|
|
{
|
|
type = rt
|
|
path = /var/sphinx/datas/metas_realtime_' . $index_crc . '
|
|
|
|
' . $charset_abstract . '
|
|
|
|
rt_field = value
|
|
rt_field = meta_struct_id
|
|
|
|
rt_attr_uint = record_id
|
|
rt_attr_uint = sbas_id
|
|
rt_attr_uint = coll_id
|
|
rt_attr_uint = parent_record_id
|
|
rt_attr_uint = crc_struct_id
|
|
rt_attr_uint = crc_sbas_coll
|
|
rt_attr_uint = crc_sbas_record
|
|
rt_attr_uint = crc_type
|
|
rt_attr_uint = deleted
|
|
rt_attr_uint = business
|
|
rt_attr_uint = crc_coll_business
|
|
rt_attr_timestamp = created_on
|
|
}
|
|
|
|
#--------------------------------------
|
|
### All documents Index (give the last 1000 records added, etc...)
|
|
|
|
source src_documents' . $index_crc . ' : database_cfg' . $index_crc . '
|
|
{
|
|
sql_query = \
|
|
SELECT r.record_id as id, record_id, r.parent_record_id, ' . $databox->get_sbas_id() . ' as sbas_id, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.coll_id)) as crc_sbas_coll, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', r.record_id)) as crc_sbas_record, \
|
|
CONCAT_WS("_", ' . $databox->get_sbas_id() . ' , r.coll_id) as sbas_coll, \
|
|
CRC32(r.type) as crc_type, r.coll_id, \
|
|
UNIX_TIMESTAMP(credate) as created_on, 0 as deleted \
|
|
FROM record r
|
|
|
|
# documents can be filtered / sorted on each sql_attr
|
|
sql_attr_uint = record_id
|
|
sql_attr_uint = sbas_id
|
|
sql_attr_uint = coll_id
|
|
sql_attr_uint = parent_record_id
|
|
sql_attr_uint = crc_sbas_coll
|
|
sql_attr_uint = crc_sbas_record
|
|
sql_attr_uint = crc_type
|
|
sql_attr_uint = deleted
|
|
sql_attr_timestamp = created_on
|
|
|
|
sql_attr_multi = uint status from query; SELECT r.record_id as id, \
|
|
CRC32(CONCAT_WS("_", ' . $databox->get_sbas_id() . ', s.name)) as name \
|
|
FROM record r, status s \
|
|
WHERE s.record_id = r.record_id AND s.value = 1 \
|
|
ORDER BY r.record_id ASC
|
|
|
|
sql_joined_field = metas from query; \
|
|
SELECT m.record_id as id, m.value \
|
|
FROM metadatas m, metadatas_structure s \
|
|
WHERE s.id = m.meta_struct_id AND s.business = 0 \
|
|
ORDER BY m.record_id ASC
|
|
|
|
# datas returned in the resultset
|
|
sql_query_info = SELECT r.* FROM record r WHERE r.record_id=$id
|
|
}
|
|
|
|
#--------------------------------------
|
|
### All documents Index
|
|
|
|
index documents' . $index_crc . ' : suggest' . $index_crc . '
|
|
{
|
|
source = src_documents' . $index_crc . '
|
|
path = /var/sphinx/datas/documents_' . $index_crc . '
|
|
|
|
morphology = none
|
|
}
|
|
|
|
index documents' . $index_crc . '_stemmed_fr : documents' . $index_crc . '
|
|
{
|
|
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_fr
|
|
|
|
morphology = libstemmer_fr
|
|
|
|
# minimum word length at which to enable stemming
|
|
# optional, default is 1 (stem everything)
|
|
#
|
|
min_stemming_len = 1
|
|
|
|
# whether to index original keywords along with stemmed versions
|
|
# enables "=exactform" operator to work
|
|
# optional, default is 0
|
|
#
|
|
index_exact_words = 1
|
|
}
|
|
|
|
index documents' . $index_crc . '_stemmed_en : documents' . $index_crc . '
|
|
{
|
|
path = /var/sphinx/datas/documents_' . $index_crc . '_stemmed_en
|
|
|
|
morphology = libstemmer_en
|
|
|
|
# minimum word length at which to enable stemming
|
|
# optional, default is 1 (stem everything)
|
|
#
|
|
min_stemming_len = 1
|
|
|
|
# whether to index original keywords along with stemmed versions
|
|
# enables "=exactform" operator to work
|
|
# optional, default is 0
|
|
#
|
|
index_exact_words = 1
|
|
}
|
|
|
|
#--------------------------------------
|
|
### DOCS_REALTIME Index
|
|
|
|
index docs_realtime' . $index_crc . '
|
|
{
|
|
type = rt
|
|
path = /var/sphinx/datas/docs_realtime_' . $index_crc . '
|
|
|
|
' . $charset_abstract . '
|
|
|
|
rt_field = value
|
|
# rt_field = meta_struct_id
|
|
|
|
rt_attr_uint = record_id
|
|
rt_attr_uint = sbas_id
|
|
rt_attr_uint = coll_id
|
|
rt_attr_uint = parent_record_id
|
|
# rt_attr_uint = crc_struct_id
|
|
rt_attr_uint = crc_sbas_coll
|
|
rt_attr_uint = crc_sbas_record
|
|
rt_attr_uint = crc_type
|
|
rt_attr_uint = deleted
|
|
rt_attr_timestamp = created_on
|
|
}
|
|
|
|
#------------------------------------------------------------------------------
|
|
# ***************** End configuration for ' . $databox->get_viewname() . '
|
|
#------------------------------------------------------------------------------
|
|
|
|
';
|
|
}
|
|
|
|
$conf .='
|
|
|
|
#******************************************************************************
|
|
#****************** Sphinx Indexer Configuration ****************************
|
|
#******************************************************************************
|
|
|
|
indexer {
|
|
mem_limit = 512M
|
|
|
|
# maximum IO calls per second (for I/O throttling)
|
|
# optional, default is 0 (unlimited)
|
|
#
|
|
# max_iops = 40
|
|
|
|
# maximum IO call size, bytes (for I/O throttling)
|
|
# optional, default is 0 (unlimited)
|
|
#
|
|
# max_iosize = 1048576
|
|
}
|
|
|
|
#******************************************************************************
|
|
#****************** Sphinx Search Daemon Configuration **********************
|
|
#******************************************************************************
|
|
|
|
searchd
|
|
{
|
|
# [hostname:]port[:protocol], or /unix/socket/path to listen on
|
|
# known protocols are \'sphinx\' (SphinxAPI) and \'mysql41\' (SphinxQL)
|
|
#
|
|
# multi-value, multiple listen points are allowed
|
|
# optional, defaults are 9312:sphinx and 9306:mysql41, as below
|
|
#
|
|
# listen = 127.0.0.1
|
|
# listen = 192.168.0.1:9312
|
|
# listen = 9312
|
|
# listen = /var/run/searchd.sock
|
|
listen = 9306
|
|
listen = 9308:mysql41
|
|
|
|
# log file, searchd run info is logged here
|
|
# optional, default is \'searchd.log\'
|
|
log = /var/sphinx/searchd.log
|
|
|
|
# query log file, all search queries are logged here
|
|
# optional, default is empty (do not log queries)
|
|
query_log = /var/sphinx/query.log
|
|
|
|
# client read timeout, seconds
|
|
# optional, default is 5
|
|
read_timeout = 5
|
|
|
|
# request timeout, seconds
|
|
# optional, default is 5 minutes
|
|
client_timeout = 300
|
|
|
|
# maximum amount of children to fork (concurrent searches to run)
|
|
# optional, default is 0 (unlimited)
|
|
max_children = 30
|
|
|
|
# PID file, searchd process ID file name
|
|
# mandatory
|
|
pid_file = /var/sphinx/searchd.pid
|
|
|
|
# max amount of matches the daemon ever keeps in RAM, per-index
|
|
# WARNING, THERE\'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
|
|
# default is 1000 (just like Google)
|
|
max_matches = 1000000
|
|
|
|
# seamless rotate, prevents rotate stalls if precaching huge datasets
|
|
# optional, default is 1
|
|
seamless_rotate = 1
|
|
|
|
# whether to forcibly preopen all indexes on startup
|
|
# optional, default is 0 (do not preopen)
|
|
preopen_indexes = 1
|
|
|
|
# whether to unlink .old index copies on succesful rotation.
|
|
# optional, default is 1 (do unlink)
|
|
unlink_old = 1
|
|
|
|
# multi-processing mode (MPM)
|
|
# known values are none, fork, prefork, and threads
|
|
# optional, default is fork
|
|
#
|
|
workers = threads # for RT to work
|
|
|
|
# binlog files path; use empty string to disable binlog
|
|
# optional, default is build-time configured data directory
|
|
#
|
|
# binlog_path = # disable logging
|
|
# binlog_path = /var/data # binlog.001 etc will be created there
|
|
binlog_path =
|
|
|
|
# binlog flush/sync mode
|
|
# 0 means flush and sync every second
|
|
# 1 means flush and sync every transaction
|
|
# 2 means flush every transaction, sync every second
|
|
# optional, default is 2
|
|
#
|
|
binlog_flush = 2
|
|
|
|
# binlog per-file size limit
|
|
# optional, default is 128M, 0 means no limit
|
|
#
|
|
# binlog_max_log_size = 256M
|
|
|
|
# max threads to create for searching local parts of a distributed index
|
|
# optional, default is 0, which means disable multi-threaded searching
|
|
# should work with all MPMs (ie. does NOT require workers=threads)
|
|
#
|
|
dist_threads = 4
|
|
|
|
# max common subtree document cache size, per-query
|
|
# optional, default is 0 (disable subtree optimization)
|
|
#
|
|
subtree_docs_cache = 4M
|
|
|
|
# max common subtree hit cache size, per-query
|
|
# optional, default is 0 (disable subtree optimization)
|
|
#
|
|
subtree_hits_cache = 8M
|
|
|
|
# max allowed per-query filter count
|
|
# optional, default is 256
|
|
#
|
|
max_filters = 512
|
|
|
|
compat_sphinxql_magics = 0
|
|
|
|
}
|
|
|
|
';
|
|
|
|
return $conf;
|
|
}
|
|
}
|