This commit is contained in:
Romain Neutron
2012-04-26 00:55:53 +02:00
parent edbfff226e
commit ade22295ad
631 changed files with 92375 additions and 101763 deletions

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_CJK extends sphinx_charsetTableAbstract
{
protected $name = 'Chinese, Japanese, and Korean';
protected $table = '
protected $name = 'Chinese, Japanese, and Korean';
protected $table = '
#################################################
# CJK*
U+F900->U+8C48, U+F901->U+66F4, U+F902->U+8ECA, U+F903->U+8CC8, U+F904->U+6ED1,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_arabic extends sphinx_charsetTableAbstract
{
protected $name = 'Arabic';
protected $table = '
protected $name = 'Arabic';
protected $table = '
##################################################
# Arabic
U+0622->U+0627, U+0623->U+0627, U+0624->U+0648, U+0625->U+0627, U+0626->U+064A,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_armenian extends sphinx_charsetTableAbstract
{
protected $name = 'Armenian';
protected $table = '
protected $name = 'Armenian';
protected $table = '
##################################################
# Armenian
U+0531..U+0556->U+0561..U+0586, U+0561..U+0586, U+0587

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_bengali extends sphinx_charsetTableAbstract
{
protected $name = 'Bengali';
protected $table = '
protected $name = 'Bengali';
protected $table = '
#################################################
# Bengali
U+09DC->U+09A1, U+09DD->U+09A2, U+09DF->U+09AF, U+09F0->U+09AC, U+09F1->U+09AC,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_common extends sphinx_charsetTableAbstract
{
protected $name = 'Default';
protected $table = '
protected $name = 'Default';
protected $table = '
##################################################
# Common
U+FF10..U+FF19->0..9, U+FF21..U+FF3A->a..z, U+FF41..U+FF5A->a..z, 0..9,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_coptic extends sphinx_charsetTableAbstract
{
protected $name = 'Coptic';
protected $table = '
protected $name = 'Coptic';
protected $table = '
##################################################
# Coptic
# Notes: Some shared Greek characters, may require ammendments.

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_cyrillic extends sphinx_charsetTableAbstract
{
protected $name = 'Cyrillic';
protected $table = '
protected $name = 'Cyrillic';
protected $table = '
##################################################
# Cryllic*
U+0400->U+0435, U+0401->U+0435, U+0402->U+0452, U+0452, U+0403->U+0433,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_devanagari extends sphinx_charsetTableAbstract
{
protected $name = 'Devanagari';
protected $table = '
protected $name = 'Devanagari';
protected $table = '
##################################################
# Devanagari
U+0929->U+0928, U+0931->U+0930, U+0934->U+0933, U+0958->U+0915, U+0959->U+0916,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_georgian extends sphinx_charsetTableAbstract
{
protected $name = 'Georgian';
protected $table = '
protected $name = 'Georgian';
protected $table = '
##################################################
# Georgian
U+10FC->U+10DC, U+10D0..U+10FA, U+10A0..U+10C5->U+2D00..U+2D25, U+2D00..U+2D25

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_greek extends sphinx_charsetTableAbstract
{
protected $name = 'Greek';
protected $table = '
protected $name = 'Greek';
protected $table = '
##################################################
# Greek
U+0386->U+03B1, U+0388->U+03B5, U+0389->U+03B7, U+038A->U+03B9, U+038C->U+03BF,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_gujarati extends sphinx_charsetTableAbstract
{
protected $name = 'Gujarati';
protected $table = '
protected $name = 'Gujarati';
protected $table = '
##################################################
# Gujarati
U+0A85..U+0A8C, U+0A8F, U+0A90, U+0A93..U+0AB0, U+0AB2, U+0AB3, U+0AB5..U+0AB9,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_gurmukhi extends sphinx_charsetTableAbstract
{
protected $name = 'Gurmukhi';
protected $table = '
protected $name = 'Gurmukhi';
protected $table = '
##################################################
# Gurmukhi
U+0A33->U+0A32, U+0A36->U+0A38, U+0A59->U+0A16, U+0A5A->U+0A17, U+0A5B->U+0A1C,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_hebrew extends sphinx_charsetTableAbstract
{
protected $name = 'Hebrew';
protected $table = '
protected $name = 'Hebrew';
protected $table = '
#################################################
# Hebrew*
U+FB1D->U+05D9, U+FB1F->U+05F2, U+FB20->U+05E2, U+FB21->U+05D0, U+FB22->U+05D3,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_kannada extends sphinx_charsetTableAbstract
{
protected $name = 'Kannada';
protected $table = '
protected $name = 'Kannada';
protected $table = '
#################################################
# Kannada
U+0C85..U+0C8C, U+0C8E..U+0C90, U+0C92..U+0CA8, U+0CAA..U+0CB3, U+0CB5..U+0CB9,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_latin extends sphinx_charsetTableAbstract
{
protected $name = 'Latin';
protected $table = '
protected $name = 'Latin';
protected $table = '
##################################################
# Latin
# A

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_limbu extends sphinx_charsetTableAbstract
{
protected $name = 'Limbu';
protected $table = '
protected $name = 'Limbu';
protected $table = '
#################################################
# Limbu
U+1900..U+191C, U+1930..U+1938, U+1946..U+194F

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_malayalam extends sphinx_charsetTableAbstract
{
protected $name = 'Malayalam';
protected $table = '
protected $name = 'Malayalam';
protected $table = '
#################################################
# Malayalam
U+0D05..U+0D0C, U+0D0E..U+0D10, U+0D12..U+0D28, U+0D2A..U+0D39, U+0D60,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_tamil extends sphinx_charsetTableAbstract
{
protected $name = 'Tamil';
protected $table = '
protected $name = 'Tamil';
protected $table = '
#################################################
# Tamil
U+0B94->U+0B92, U+0B85..U+0B8A, U+0B8E..U+0B90, U+0B92, U+0B93, U+0B95, U+0B99,

View File

@@ -7,9 +7,8 @@
class sphinx_charsetTable_thai extends sphinx_charsetTableAbstract
{
protected $name = 'Thai';
protected $table = '
protected $name = 'Thai';
protected $table = '
#################################################
# Thai
U+0E01..U+0E30, U+0E32, U+0E33, U+0E40..U+0E46, U+0E50..U+0E5B

View File

@@ -17,22 +17,19 @@
*/
abstract class sphinx_charsetTableAbstract
{
protected $table;
protected $name;
protected $table;
public function get_name()
{
return $this->name;
}
protected $name;
public function get_name()
{
return $this->name;
}
public function get_table()
{
if (is_null($this->table))
throw new Exception('Invalid charsetTable object');
return $this->table;
}
public function get_table()
{
if (is_null($this->table))
throw new Exception('Invalid charsetTable object');
return $this->table;
}
}

View File

@@ -17,108 +17,97 @@
*/
class sphinx_configuration
{
const OPT_ALL_SBAS = 'all';
const OPT_LIBSTEMMER_NONE = 'none';
const OPT_LIBSTEMMER_FR = 'fr';
const OPT_LIBSTEMMER_EN = 'en';
const OPT_ENABLE_STAR_ON = 'yes';
const OPT_ENABLE_STAR_OFF = 'no';
const OPT_MIN_PREFIX_LEN = 0;
const OPT_MIN_INFIX_LEN = 1;
const OPT_ALL_SBAS = 'all';
const OPT_LIBSTEMMER_NONE = 'none';
const OPT_LIBSTEMMER_FR = 'fr';
const OPT_LIBSTEMMER_EN = 'en';
const OPT_ENABLE_STAR_ON = 'yes';
const OPT_ENABLE_STAR_OFF = 'no';
const OPT_MIN_PREFIX_LEN = 0;
const OPT_MIN_INFIX_LEN = 1;
public function __construct()
{
}
public function get_available_charsets()
{
$available_charsets = array();
$dir = __DIR__ . '/charsetTable/';
echo $dir;
$registry = registry::get_instance();
foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator($dir), RecursiveIteratorIterator::LEAVES_ONLY) as $file)
public function __construct()
{
if ($file->isDir() || strpos($file->getPathname(), '/.svn/') !== false)
{
continue;
}
if ($file->isFile())
{
$classname = str_replace(array($registry->get('GV_RootPath') . 'lib/classes/', '.class.php', '/'), array('', '', '_'), $file->getPathname());
$available_charsets[$classname] = new $classname;
}
}
ksort($available_charsets);
return $available_charsets;
}
public function get_available_libstemmer()
{
return array(self::OPT_LIBSTEMMER_EN, self::OPT_LIBSTEMMER_FR, self::OPT_LIBSTEMMER_NONE);
}
public function get_configuration($options = array())
{
$defaults = array(
'sbas' => self::OPT_ALL_SBAS
, 'libstemmer' => array(self::OPT_LIBSTEMMER_NONE)
, 'enable_star' => self::OPT_ENABLE_STAR_ON
, 'min_prefix_len' => self::OPT_MIN_PREFIX_LEN
, 'min_infix_len' => self::OPT_MIN_INFIX_LEN
, 'charset_tables' => array()
);
$options = array_merge($defaults, $options);
$options['charset_tables'] = array_unique($options['charset_tables']);
$lb = phrasea::sbas_params();
$conf = '';
$charsets = '';
foreach ($options['charset_tables'] as $charset)
{
try
{
$charset_table = new $charset();
$charsets .= $charset_table->get_table();
}
catch (Exception $e)
{
}
}
$charsets = explode("\n", $charsets);
$last_detect = false;
for ($i = (count($charsets) - 1); $i >= 0; $i -- )
public function get_available_charsets()
{
if (trim($charsets[$i]) === '')
{
unset($charsets[$i]);
continue;
}
if (strpos(trim($charsets[$i]), '#') === 0)
{
unset($charsets[$i]);
continue;
}
if ($last_detect === true && substr(trim($charsets[$i]), (strlen(trim($charsets[$i])) - 1), 1) !== ',')
$charsets[$i] = rtrim($charsets[$i]) . ', ';
$charsets[$i] = " " . $charsets[$i] . " \\\n";
$last_detect = true;
$available_charsets = array();
$dir = __DIR__ . '/charsetTable/';
echo $dir;
$registry = registry::get_instance();
foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator($dir), RecursiveIteratorIterator::LEAVES_ONLY) as $file) {
if ($file->isDir() || strpos($file->getPathname(), '/.svn/') !== false) {
continue;
}
if ($file->isFile()) {
$classname = str_replace(array($registry->get('GV_RootPath') . 'lib/classes/', '.class.php', '/'), array('', '', '_'), $file->getPathname());
$available_charsets[$classname] = new $classname;
}
}
ksort($available_charsets);
return $available_charsets;
}
$charsets = "\\\n" . implode('', $charsets);
public function get_available_libstemmer()
{
return array(self::OPT_LIBSTEMMER_EN, self::OPT_LIBSTEMMER_FR, self::OPT_LIBSTEMMER_NONE);
}
$charset_abstract = '
public function get_configuration($options = array())
{
$defaults = array(
'sbas' => self::OPT_ALL_SBAS
, 'libstemmer' => array(self::OPT_LIBSTEMMER_NONE)
, 'enable_star' => self::OPT_ENABLE_STAR_ON
, 'min_prefix_len' => self::OPT_MIN_PREFIX_LEN
, 'min_infix_len' => self::OPT_MIN_INFIX_LEN
, 'charset_tables' => array()
);
$options = array_merge($defaults, $options);
$options['charset_tables'] = array_unique($options['charset_tables']);
$lb = phrasea::sbas_params();
$conf = '';
$charsets = '';
foreach ($options['charset_tables'] as $charset) {
try {
$charset_table = new $charset();
$charsets .= $charset_table->get_table();
} catch (Exception $e) {
}
}
$charsets = explode("\n", $charsets);
$last_detect = false;
for ($i = (count($charsets) - 1); $i >= 0; $i -- ) {
if (trim($charsets[$i]) === '') {
unset($charsets[$i]);
continue;
}
if (strpos(trim($charsets[$i]), '#') === 0) {
unset($charsets[$i]);
continue;
}
if ($last_detect === true && substr(trim($charsets[$i]), (strlen(trim($charsets[$i])) - 1), 1) !== ',')
$charsets[$i] = rtrim($charsets[$i]) . ', ';
$charsets[$i] = " " . $charsets[$i] . " \\\n";
$last_detect = true;
}
$charsets = "\\\n" . implode('', $charsets);
$charset_abstract = '
docinfo = extern
charset_type = utf-8
@@ -146,13 +135,12 @@ class sphinx_configuration
min_infix_len = 1
';
foreach ($lb as $id => $params)
{
foreach ($lb as $id => $params) {
$serialized = str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname']));
$index_crc = crc32($serialized);
$serialized = str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname']));
$index_crc = crc32($serialized);
$conf .= '
$conf .= '
#------------------------------------------------------------------------------
@@ -248,9 +236,8 @@ class sphinx_configuration
';
if (in_array(self::OPT_LIBSTEMMER_NONE, $options['libstemmer']))
{
$conf .= '
if (in_array(self::OPT_LIBSTEMMER_NONE, $options['libstemmer'])) {
$conf .= '
#--------------------------------------
### Metadatas Index
@@ -261,11 +248,10 @@ class sphinx_configuration
}
';
}
}
if (in_array(self::OPT_LIBSTEMMER_FR, $options['libstemmer']))
{
$conf .= '
if (in_array(self::OPT_LIBSTEMMER_FR, $options['libstemmer'])) {
$conf .= '
#--------------------------------------
### Metadatas Index Stemmed FR
@@ -290,10 +276,9 @@ class sphinx_configuration
index_exact_words = 1
}
';
}
if (in_array(self::OPT_LIBSTEMMER_EN, $options['libstemmer']))
{
$conf .= '
}
if (in_array(self::OPT_LIBSTEMMER_EN, $options['libstemmer'])) {
$conf .= '
#--------------------------------------
### Metadatas Index Stemmed EN
@@ -318,8 +303,8 @@ class sphinx_configuration
index_exact_words = 1
}
';
}
$conf .= '
}
$conf .= '
#--------------------------------------
### METAS_REALTIME Index
@@ -474,9 +459,9 @@ class sphinx_configuration
';
}
}
$conf .='
$conf .='
#******************************************************************************
@@ -613,7 +598,6 @@ searchd
return $conf;
}
return $conf;
}
}