V 3.5 RC 1

This commit is contained in:
Romain Neutron
2011-12-05 00:23:28 +01:00
parent 6f1ee368aa
commit 4c5b7eb658
5563 changed files with 466984 additions and 985416 deletions

View File

@@ -0,0 +1,190 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2010 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
*
* @package searchEngine
* @license http://opensource.org/licenses/gpl-3.0 GPLv3
* @link www.phraseanet.com
*/
abstract class searchEngine_adapter_abstract
{
/**
*
* @var int
*/
protected $current_page;
/**
*
* @var int
*/
protected $total_results;
/**
*
* @var int
*/
protected $perPage;
/**
*
* @var string
*/
protected $query;
/**
*
* @var string
*/
protected $error = '';
/**
*
* @var string
*/
protected $warning = '';
/**
*
* @var int
*/
protected $total_available;
/**
*
* @var float
*/
protected $total_time;
/**
*
* @var int
*/
protected $offset_start;
/**
*
* @var boolean
*/
protected $use_stemming = true;
/**
*
* @var string
*/
protected $locale;
/**
*
* @var string
*/
protected $current_index = '';
/**
*
* @return int
*/
public function get_available_results()
{
return $this->total_available;
}
/**
*
* @return float
*/
public function get_time()
{
return $this->total_time;
}
/**
*
* @return string
*/
public function get_error()
{
return $this->error;
}
/**
*
* @return string
*/
public function get_warning()
{
return $this->warning;
}
/**
*
* @return string
*/
public function get_propositions()
{
return null;
}
/**
*
* @return searchEngine_adapter_abstract
*/
public function reset_cache()
{
return $this;
}
/**
*
* @return int
*/
public function get_per_page()
{
return $this->perPage;
}
/**
*
* @return int
*/
public function get_total_results()
{
return $this->total_results;
}
/**
*
* @return int
*/
public function get_total_pages()
{
return (int) ceil($this->get_available_results() / $this->get_per_page());
}
/**
*
* @return int
*/
public function get_current_page()
{
return $this->current_page;
}
/**
*
* @return int
*/
public function get_offset_start()
{
return $this->offset_start;
}
/**
*
* @return string
*/
public function get_current_indexes()
{
return $this->current_index;
}
}

View File

@@ -0,0 +1,57 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2010 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
*
* @package searchEngine
* @license http://opensource.org/licenses/gpl-3.0 GPLv3
* @link www.phraseanet.com
*/
interface searchEngine_adapter_interface
{
public function __construct();
public function set_options(searchEngine_options $options);
public function reset_cache();
public function get_time();
public function get_total_pages();
public function get_offset_start();
public function get_current_page();
public function get_per_page();
public function get_total_results();
public function get_available_results();
public function get_propositions();
public function get_parsed_query();
public function get_suggestions(Session_Handler $session);
public function get_error();
public function get_warning();
public function get_current_indexes();
public function get_status();
public function results($query, $page, $perPage);
public function build_excerpt($query, array $fields, record_adapter $record);
}

View File

@@ -0,0 +1,645 @@
<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2010 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
*
* @package searchEngine
* @license http://opensource.org/licenses/gpl-3.0 GPLv3
* @link www.phraseanet.com
*/
class searchEngine_adapter_phrasea_engine extends searchEngine_adapter_abstract implements searchEngine_adapter_interface
{
/**
*
* @var array
*/
protected $queries = array();
/**
*
* @var array
*/
protected $colls = array();
/**
*
* @var array
*/
protected $qp = array();
/**
*
* @var array
*/
protected $answers = array();
/**
*
* @var array
*/
protected $needthesaurus = array();
/**
*
* @var array
*/
protected $indep_treeq = array();
/**
*
* @var boolean
*/
protected $options = false;
/**
*
* @var array
*/
protected $arrayq = array();
/**
*
* @var array
*/
protected $results = array();
/**
*
* @var boolean
*/
protected $reseted = false;
/**
*
* @var int
*/
protected $opt_search_type;
/**
*
* @var array
*/
protected $opt_bases = array();
/**
*
* @var array
*/
protected $opt_fields = array();
/**
*
* @var array
*/
protected $opt_status = array();
/**
*
* @var array
*/
protected $opt_date_field;
/**
*
* @var DateTime
*/
protected $opt_min_date;
/**
*
* @var DateTime
*/
protected $opt_max_date;
/**
*
* @var string
*/
protected $opt_record_type;
/**
*
* @return searchEngine_adapter_phrasea_engine
*/
function __construct()
{
return $this;
}
/**
*
* @param searchEngine_options $options
* @return searchEngine_adapter_phrasea_engine
*/
public function set_options(searchEngine_options $options)
{
$this->opt_search_type = (int) $options->get_search_type();
$this->opt_bases = $options->get_bases();
$this->opt_fields = $options->get_fields();
$this->opt_date_field = $options->get_date_fields();
$this->opt_max_date = $options->get_max_date();
$this->opt_min_date = $options->get_min_date();
if (in_array($options->get_record_type(), array('image', 'video', 'audio', 'document', 'flash')))
$this->opt_record_type = $options->get_record_type();
foreach ($options->get_fields() as $field)
{
if (trim($field) === 'phraseanet--all--fields')
{
$this->opt_fields = array();
break;
}
}
$this->opt_status = $options->get_status();
return $this;
}
/**
*
* @param <type> $proposals
* @return string
*/
protected static function proposalsToHTML($proposals)
{
$html = '';
$b = true;
foreach ($proposals["BASES"] as $zbase)
{
if ((int) (count($proposals["BASES"]) > 1) && count($zbase["TERMS"]) > 0)
{
$style = $b ? 'style="margin-top:0px;"' : '';
$b = false;
$html .= "<h1 $style>" . sprintf(_('reponses::propositions pour la base %s'), $zbase["NAME"]) . "</h1>";
}
$t = true;
foreach ($zbase["TERMS"] as $path => $props)
{
$style = $t ? 'style="margin-top:0px;"' : '';
$t = false;
$html .= "<h2 $style>" . sprintf(_('reponses::propositions pour le terme %s'), $props["TERM"]) . "</h2>";
$html .= $props["HTML"];
}
}
$html .= '';
return($html);
}
/**
*
* @return string
*/
public function get_propositions()
{
if (isset($this->qp['main']))
{
$proposals = self::proposalsToHTML($this->qp['main']->proposals);
if (trim($proposals) !== '')
return "<div style='height:0px; overflow:hidden'>" . $this->qp['main']->proposals["QRY"]
. "</div><div class='proposals'>" . $proposals . "</div>";
}
return null;
}
/**
*
* @param int $query
* @param int $offset
* @param int $perPage
* @return searchEngine_results
*/
function results($query, $offset, $perPage)
{
assert(is_int($offset));
assert($offset >= 0);
assert(is_int($perPage));
$page = floor($offset / $perPage)+1;
$this->current_page = $page;
$this->perPage = $perPage;
$page = $this->get_current_page();
if (trim($query) === '')
$query = "all";
if ($this->opt_record_type != '')
{
$query .= ' AND recordtype=' . $this->opt_record_type;
}
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$sql = 'SELECT query, query_time FROM cache WHERE session_id = :ses_id';
$stmt = $appbox->get_connection()->prepare($sql);
$stmt->execute(array(':ses_id' => $session->get_ses_id()));
$row = $stmt->fetch(PDO::FETCH_ASSOC);
$stmt->closeCursor();
$date_obj = new DateTime('-10 min');
$date_quest = new DateTime($row['query_time']);
$reseted = $this->reseted;
$reseted = false;
if ($this->reseted)
$reseted = true;
if ($query != $row['query'])
$reseted = true;
if ($date_obj > $date_quest)
$reseted = true;
if ($reseted === true)
{
$this->reset_cache();
self::addQuery($query);
self::query();
}
else
{
$this->total_available = $this->total_results = $session->get_session_prefs('phrasea_engine_n_results');
}
$results = new set_result();
$perPage = $this->get_per_page();
$page = $this->get_current_page();
$this->offset_start = $courcahnum = (($page - 1) * $perPage);
$res = phrasea_fetch_results(
$session->get_ses_id(), (int)(($page - 1) * $perPage) + 1, $perPage, false
);
$rs = array();
if (isset($res['results']) && is_array($res['results']))
$rs = $res['results'];
foreach ($rs as $irec => $data)
{
try
{
$sbas_id = phrasea::sbasFromBas($data['base_id']);
$record = new record_adapter(
$sbas_id,
$data['record_id'],
$courcahnum
);
$results->add_element($record);
}
catch (Exception $e)
{
}
$courcahnum++;
}
return new searchEngine_results($results, $this);
}
/**
*
* @return searchEngine_adapter_phrasea_engine
*/
public function reset_cache()
{
$appbox = appbox::get_instance();
$session = $appbox->get_session();
phrasea_clear_cache($session->get_ses_id());
$this->reseted = true;
return $this;
}
/**
*
* @return array
*/
public function get_status()
{
$infos = phrasea_info();
$status = array();
foreach ($infos as $key => $value)
{
$status[] = array($key, $value);
}
return $status;
}
/**
*
* @param Session_Handler $session
* @return array
*/
public function get_suggestions(Session_Handler $session)
{
return array();
}
/**
*
* @return string
*/
public function get_parsed_query()
{
return $this->query;
}
/**
*
* @return searchEngine_adapter_phrasea_engine
*/
protected function query()
{
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$registry = $appbox->get_registry();
$dateLog = date("Y-m-d H:i:s");
$nbanswers = 0;
$sql = 'UPDATE cache SET query = :query, query_time = NOW()
WHERE session_id = :ses_id';
$params = array(
'query' => $this->get_parsed_query()
, ':ses_id' => $session->get_ses_id()
);
$stmt = $appbox->get_connection()->prepare($sql);
$stmt->execute($params);
$stmt->closeCursor();
$total_time = 0;
foreach ($this->queries as $sbas_id => $qry)
{
if ($this->opt_search_type == 1)
{
$this->results[$sbas_id] = phrasea_query2(
$session->get_ses_id()
, $sbas_id
, $this->colls[$sbas_id]
, $this->arrayq[$sbas_id]
, $registry->get('GV_sit')
, (string) $session->get_usr_id()
, false
, PHRASEA_MULTIDOC_REGONLY
);
}
else
{
$this->results[$sbas_id] = phrasea_query2(
$session->get_ses_id()
, $sbas_id
, $this->colls[$sbas_id]
, $this->arrayq[$sbas_id]
, $registry->get('GV_sit')
, (string) $session->get_usr_id()
, false
, PHRASEA_MULTIDOC_DOCONLY
);
}
$total_time += $this->results[$sbas_id]['time_all'];
if ($this->results[$sbas_id])
$nbanswers += $this->results[$sbas_id]["nbanswers"];
$logger = $session->get_logger(databox::get_instance($sbas_id));
$conn2 = connection::getPDOConnection($sbas_id);
$sql3 = "INSERT INTO log_search
(id, log_id, date, search, results, coll_id )
VALUES
(null, :log_id, :date, :query, :nbresults, :colls)";
$params = array(
':log_id' => $logger->get_id()
, ':date' => $dateLog
, ':query' => $this->query
, ':nbresults' => $this->results[$sbas_id]["nbanswers"]
, ':colls' => implode(',', $this->colls[$sbas_id])
);
$stmt = $conn2->prepare($sql3);
$stmt->execute($params);
$stmt->closeCursor();
}
$this->total_time = $total_time;
User_Adapter::saveQuery($this->query);
$session->set_session_prefs('phrasea_engine_n_results', $nbanswers);
$this->total_available = $this->total_results = $nbanswers;
return $this;
}
/**
*
* @param int $sbas
* @return searchEngine_adapter_phrasea_engine
*/
protected function singleParse($sbas)
{
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$this->qp[$sbas] = new searchEngine_adapter_phrasea_queryParser(Session_Handler::get_locale());
$this->qp[$sbas]->debug = false;
if ($sbas == 'main')
$simple_treeq = $this->qp[$sbas]->parsequery($this->query);
else
$simple_treeq = $this->qp[$sbas]->parsequery($this->queries[$sbas]);
$this->qp[$sbas]->priority_opk($simple_treeq);
$this->qp[$sbas]->distrib_opk($simple_treeq);
$this->needthesaurus[$sbas] = false;
$this->indep_treeq[$sbas] = $this->qp[$sbas]->extendThesaurusOnTerms($simple_treeq, true, true, false);
$this->needthesaurus[$sbas] = $this->qp[$sbas]->containsColonOperator($this->indep_treeq[$sbas]);
return $this;
}
/**
*
* @param string $query
* @return searchEngine_adapter_phrasea_engine
*/
protected function addQuery($query)
{
$qry = '';
if (trim($query) != '')
{
$qry .= trim($query);
}
$appbox = appbox::get_instance();
foreach ($appbox->get_databoxes() as $databox)
{
foreach ($databox->get_collections() as $coll)
{
if (in_array($coll->get_base_id(), $this->opt_bases))
{
$this->queries[$databox->get_sbas_id()] = $qry;
break;
}
}
}
$this->query = $qry;
foreach ($this->queries as $sbas => $qs)
{
if ($sbas === 'main')
continue;
if (count($this->opt_status) > 0)
{
$requestStat = 'xxxx';
for ($i = 4; ($i <= 64); $i++)
{
if (!isset($this->opt_status[$i]))
{
$requestStat = 'x' . $requestStat;
continue;
}
$set = false;
$val = '';
if (isset($this->opt_status[$i][$sbas]) && $this->opt_status[$i][$sbas] == '0')
{
$set = true;
$val = '0';
}
if (isset($this->opt_status[$i][$sbas]) && $this->opt_status[$i][$sbas] == '1')
{
if ($set)
$val = 'x';
else
$val = '1';
}
$requestStat = ( $val != '' ? $val : 'x' ) . $requestStat;
}
$requestStat = trim(ltrim($requestStat, 'x'));
if ($requestStat !== '')
$this->queries[$sbas] .= ' and (recordstatus=' . $requestStat . ')';
}
if (count($this->opt_fields) > 0)
{
$this->queries[$sbas] .= ' dans (' . implode(' ou ', $this->opt_fields) . ')';
}
if (($this->opt_min_date || $this->opt_max_date) && $this->opt_date_field != '')
{
if ($this->opt_min_date)
$this->queries[$sbas] .= ' AND ( ' . implode(' >= ' . $this->opt_min_date->format('Y-m-d') . ' OR ', $this->opt_date_field) . ' >= ' . $this->opt_min_date->format('Y-m-d') . ' ) ';
if ($this->opt_max_date)
$this->queries[$sbas] .= ' AND ( ' . implode(' <= ' . $this->opt_max_date->format('Y-m-d') . ' OR ', $this->opt_date_field) . ' <= ' . $this->opt_max_date->format('Y-m-d') . ' ) ';
}
}
$this->singleParse('main');
foreach ($this->queries as $sbas => $qryBas)
$this->singleParse($sbas);
foreach ($appbox->get_databoxes() as $databox)
{
if (!isset($this->queries[$databox->get_sbas_id()]))
continue;
//$databox = databox::get_instance($sbas_id);
$sbas_id = $databox->get_sbas_id();
$this->colls[$sbas_id] = array();
foreach ($databox->get_collections() as $coll)
{
if (in_array($coll->get_base_id(), $this->opt_bases))
$this->colls[$sbas_id][] = (int) $coll->get_base_id();
}
if (sizeof($this->colls[$sbas_id]) <= 0)
continue;
if ($this->needthesaurus[$sbas_id])
{
$domthesaurus = $databox->get_dom_thesaurus();
if ($domthesaurus)
{
$this->qp[$sbas_id]->thesaurus2($this->indep_treeq[$sbas_id], $sbas_id, $databox->get_dbname(), $domthesaurus, true);
$this->qp['main']->thesaurus2($this->indep_treeq['main'], $sbas_id, $databox->get_dbname(), $domthesaurus, true);
}
}
if ($this->qp[$sbas_id]->errmsg != "")
{
$this->error .= ' ' . $this->qp[$sbas_id]->errmsg;
}
$emptyw = false;
$this->qp[$sbas_id]->set_default($this->indep_treeq[$sbas_id], $emptyw);
$this->qp[$sbas_id]->distrib_in($this->indep_treeq[$sbas_id]);
$this->qp[$sbas_id]->factor_or($this->indep_treeq[$sbas_id]);
$this->qp[$sbas_id]->setNumValue($this->indep_treeq[$sbas_id], $databox->get_sxml_structure());
$this->qp[$sbas_id]->thesaurus2_apply($this->indep_treeq[$sbas_id], $sbas_id);
$this->arrayq[$sbas_id] = $this->qp[$sbas_id]->makequery($this->indep_treeq[$sbas_id]);
$this->results[$sbas_id] = NULL;
}
return $this;
}
public function build_excerpt($query, array $fields, record_adapter $record)
{
$ret = array();
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$res = phrasea_fetch_results(
$session->get_ses_id(), ($record->get_number() + 1), 1, true, "[[em]]", "[[/em]]"
);
if (!isset($res['results']) || !is_array($res['results']))
return array();
$rs = $res['results'];
$res = array_shift($rs);
if (!$res['xml'])
return array();
$sxe = simplexml_load_string($res['xml']);
foreach ($fields as $name => $field)
{
if ($sxe->description->$name)
$ret[] = str_replace(array('[[em]]', '[[/em]]'), array('<em>', '</em>'), (string) $sxe->description->$name);
else
$ret[] = $field;
}
return $ret;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,759 @@
<?php
require_once dirname(__FILE__) . '/../../../../vendor/sphinx/sphinxapi.php';
/*
* This file is part of Phraseanet
*
* (c) 2005-2010 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
*
* @package searchEngine
* @license http://opensource.org/licenses/gpl-3.0 GPLv3
* @link www.phraseanet.com
*/
class searchEngine_adapter_sphinx_engine extends searchEngine_adapter_abstract implements searchEngine_adapter_interface
{
/**
*
* @var sphinxClient
*/
protected $sphinx;
/**
*
* @var array
*/
protected $distinct_sbas = array();
/**
*
* @var boolean
*/
protected $search_in_field = false;
/**
*
* @var searchEngine_options
*/
protected $options;
/**
*
* @var boolean
*/
protected $search_unique_record = false;
/**
*
* @return searchEngine_adapter_sphinx_engine
*/
public function __construct()
{
$registry = registry::get_instance();
$this->sphinx = new SphinxClient ();
$this->sphinx->SetArrayResult(true);
$this->sphinx->SetServer($registry->get('GV_sphinx_host'), (int) $registry->get('GV_sphinx_port'));
$this->sphinx->SetConnectTimeout(1);
return $this;
}
/**
*
* @param searchEngine_options $options
* @return searchEngine_adapter_sphinx_engine
*/
public function set_options(searchEngine_options $options)
{
$this->options = $options;
$filters = array();
$sbas_ids = array();
$this->use_stemming = $options->get_use_stemming();
$this->locale = $options->get_locale();
foreach ($options->get_bases() as $bas)
{
$this->distinct_sbas[phrasea::sbasFromBas($bas)] = true;
$key = phrasea::sbasFromBas($bas) . '_' . phrasea::collFromBas($bas);
$sbas_id = phrasea::sbasFromBas($bas);
$sbas_ids[$sbas_id] = $sbas_id;
$filters[] = crc32($key);
}
if ($filters)
{
$this->sphinx->SetFilter('crc_sbas_coll', $filters);
}
$this->sphinx->SetFilter('deleted', array(0));
$filters = array();
foreach ($sbas_ids as $sbas_id)
{
$databox = databox::get_instance($sbas_id);
$fields = $databox->get_meta_structure();
foreach ($fields as $field)
{
if (!in_array($field->get_id(), $options->get_fields()))
continue;
$key = $sbas_id . '_' . $field->get_id();
$filters[] = crc32($key);
$this->search_in_field = true;
}
}
if ($filters)
{
$this->sphinx->SetFilter('crc_struct_id', $filters);
}
/**
* @todo : enhance : check status better
*/
foreach ($sbas_ids as $sbas_id)
{
$databox = databox::get_instance($sbas_id);
$s_status = $databox->get_statusbits();
$status_opts = $options->get_status();
foreach ($s_status as $n => $status)
{
if (!array_key_exists($n, $status_opts))
continue;
if (!array_key_exists($sbas_id, $status_opts[$n]))
continue;
$crc = crc32($sbas_id . '_' . $n);
$this->sphinx->SetFilter('status', array($crc), ($status_opts[$n][$sbas_id] == '0'));
}
}
$this->sphinx->SetFilter('parent_record_id', array($options->get_search_type()));
$filters = array();
if ($options->get_record_type() != '')
{
$filters[] = crc32($options->get_record_type());
}
if ($filters)
{
$this->sphinx->SetFilter('crc_type', $filters);
}
$ord = '';
switch ($options->get_sortord())
{
case searchEngine_options::SORT_MODE_ASC:
$ord = 'ASC';
break;
case searchEngine_options::SORT_MODE_DESC:
default:
$ord = 'DESC';
break;
}
switch ($options->get_sortby())
{
case searchEngine_options::SORT_RANDOM:
$sort = '@random';
break;
case searchEngine_options::SORT_RELEVANCE:
default:
$sort = '@relevance ' . $ord . ', created_on ' . $ord;
break;
case searchEngine_options::SORT_CREATED_ON:
$sort = 'created_on ' . $ord;
break;
}
$this->sphinx->SetGroupBy('crc_sbas_record', SPH_GROUPBY_ATTR, $sort);
return $this;
}
/**
*
* @return array
*/
public function get_status()
{
$status = $this->sphinx->Status();
if (false === $status)
throw new Exception(_('Sphinx server is offline'));
return $status;
}
/**
*
* @return searchEngine_adapter_sphinx_engine
*/
protected function parse_query()
{
$this->query = trim($this->query);
while (substr($this->query, 0, 1) === '(' && substr($this->query, -1) === ')')
$this->query = substr($this->query, 1, (mb_strlen($this->query) - 2));
if ($this->query == 'all')
$this->query = '';
while (mb_strpos($this->query, ' ') !== false)
{
$this->query = str_replace(' ', ' ', $this->query);
}
$preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $this->query, $matches, 0, 0);
if ($preg > 0)
{
$this->sphinx->SetFilter('record_id', array($matches[1]));
$this->query = '';
$this->search_unique_record = true;
}
else
{
$offset = 0;
while (($pos = mb_strpos($this->query, '-', $offset)) !== false)
{
$offset = $pos + 1;
if ($pos === 0)
{
continue;
}
if (mb_substr($this->query, ($pos - 1), 1) !== ' ')
{
$this->query = mb_substr($this->query, 0, ($pos)) . ' ' . mb_substr($this->query, $pos + 1);
}
}
$this->query = str_ireplace(array(' ou ', ' or '), '|', $this->query);
$this->query = str_ireplace(array(' sauf ', ' except '), ' -', $this->query);
$this->query = str_ireplace(array(' and ', ' et '), ' +', $this->query);
}
return $this;
}
/**
*
* @param string $query
* @param int $offset
* @param int $perPage
* @return searchEngine_results
*/
public function results($query, $offset, $perPage)
{
assert(is_int($offset));
assert($offset >= 0);
assert(is_int($perPage));
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$page = ceil($offset / $perPage) + 1;
$this->current_page = $page;
$this->perPage = $perPage;
$this->offset_start = $offset;
$this->query = $query;
$this->sphinx->SetLimits($offset, $this->perPage);
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
$this->parse_query();
$index = '*';
$params = phrasea::sbas_params();
$index_keys = array();
foreach ($params as $sbas_id => $params)
{
if (!array_key_exists($sbas_id, $this->distinct_sbas))
continue;
$index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname'])));
}
if (count($index_keys) > 0)
{
if ($this->search_in_field === false)
{
$index = '';
$found = false;
if ($this->query !== '' && $this->options->get_use_stemming())
{
if ($session->get_I18n() == 'fr')
{
$index .= ', documents' . implode('_stemmed_fr, documents', $index_keys) . '_stemmed_fr';
$found = true;
}
elseif ($session->get_I18n() == 'en')
{
$index .= ', documents' . implode('_stemmed_en, documents', $index_keys) . '_stemmed_en';
$found = true;
}
}
if (!$found)
$index .= 'documents' . implode(', documents', $index_keys);
$index .= ', docs_realtime' . implode(', docs_realtime', $index_keys);
}
else
{
$index = '';
$found = false;
if ($this->query !== '' && $this->options->get_use_stemming() && $session->get_I18n() == 'fr')
{
if ($session->get_I18n() == 'fr')
{
$index .= ', metadatas' . implode('_stemmed_fr, metadatas', $index_keys) . '_stemmed_fr';
$found = true;
}
elseif ($session->get_I18n() == 'en')
{
$index .= ', metadatas' . implode('_stemmed_en, metadatas', $index_keys) . '_stemmed_en';
$found = true;
}
}
if (!$found)
$index = 'metadatas' . implode(',metadatas', $index_keys);
$index .= ', metas_realtime' . implode(', metas_realtime', $index_keys);
}
}
$this->current_index = $index;
$res = $this->sphinx->Query($this->query, $this->current_index);
$results = new set_result();
if ($res === false)
{
if ($this->sphinx->IsConnectError() === true)
{
$this->error = _('Sphinx server is offline');
}
else
{
$this->error = $this->sphinx->GetLastError();
}
$this->warning = $this->sphinx->GetLastWarning();
}
else
{
$this->error = $res['error'];
$this->warning = $res['warning'];
$this->total_time = $res['time'];
$this->total_results = $res['total_found'];
$this->total_available = $res['total'];
$courcahnum = $this->offset_start;
if (isset($res['matches']))
{
foreach ($res['matches'] as $record_id => $match)
{
try
{
$record =
new record_adapter(
$match['attrs']['sbas_id']
, $match['attrs']['record_id']
, $courcahnum
);
$results->add_element($record);
}
catch (Exception $e)
{
}
$courcahnum++;
}
}
}
return new searchEngine_results($results, $this);
}
/**
*
* @param string $keyword
* @return string
*/
function BuildTrigrams($keyword)
{
$t = "__" . $keyword . "__";
$trigrams = "";
for ($i = 0; $i < strlen($t) - 2; $i++)
$trigrams .= substr($t, $i, 3) . " ";
return $trigrams;
}
// public function get_index_suggestions($keyword)
// {
// $trigrams = $this->BuildTrigrams($keyword);
// $query = "\"$trigrams\"/1";
// $len = strlen($keyword);
//
// $this->sphinx->SetArrayResult(true);
//
// $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
// $this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT);
// $this->sphinx->SetFilterRange("len", $len - 2, $len + 2);
// $this->sphinx->SetSelect("*, @weight+2-abs(len-$len) AS myrank");
// $this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "myrank DESC, freq DESC");
// $this->sphinx->SetLimits(0, 10);
//
// $params = phrasea::sbas_params();
//
// $index_keys = array();
// foreach ($params as $sbas_id => $p)
// {
// $index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname'])));
// }
// $index = 'suggest' . implode(',suggest', $index_keys);
//
// $res = $this->sphinx->Query($query, $index);
//
// if ($this->sphinx->Status() === false)
// {
// return array();
// }
//
// if (!$res || !isset($res["matches"]))
// {
// return array();
// }
//
// $ret = array();
// foreach ($res["matches"] as $match)
// $ret[] = $match['attrs']['keyword'];
//
// return $ret;
// }
protected function get_sugg_trigrams($word)
{
$trigrams = $this->BuildTrigrams($word);
$query = "\"$trigrams\"/1";
$len = strlen($word);
$this->sphinx->ResetGroupBy();
$this->sphinx->ResetFilters();
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
$this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT);
$this->sphinx->SetFilterRange("len", $len - 2, $len + 4);
$this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC");
$this->sphinx->SetLimits(0, 10);
$params = phrasea::sbas_params();
$index_keys = array();
foreach ($params as $sbas_id => $p)
{
if (!array_key_exists($sbas_id, $this->distinct_sbas))
continue;
$index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname'])));
}
$index = 'suggest' . implode(',suggest', $index_keys);
$res = $this->sphinx->Query($query, $index);
if ($this->sphinx->Status() === false)
{
return array();
}
if (!$res || !isset($res["matches"]))
{
return array();
}
$this->sphinx->ResetGroupBy();
$this->sphinx->ResetFilters();
$this->set_options($this->options);
$ret = array();
foreach ($res["matches"] as $match)
$ret[] = $match['attrs']['keyword'];
return $ret;
}
/**
*
* @param Session_Handler $session
* @return array
*/
public function get_suggestions(Session_Handler $session, $only_last_word = false)
{
if (!$this->current_index)
$this->current_index = '*';
$appbox = appbox::get_instance();
$supposed_qry = mb_strtolower($this->query);
$pieces = explode(" ", str_replace(array("all", "last", "et", "ou", "sauf", "and", "or", "except", "in", "dans", "'", '"', "(", ")", "_", "-"), ' ', $supposed_qry));
$clef = 'sph_sugg_' . crc32(serialize($this->options) . ' ' . $this->current_index . implode(' ', $pieces) . ' ' . ($only_last_word ? '1' : '0'));
try
{
return $appbox->get_data_from_cache($clef);
}
catch (Exception $e)
{
}
$potential_queries = array();
$n = 0;
if ($only_last_word)
{
$pieces = array(array_pop($pieces));
}
$tag = $session->get_I18n();
$suggestions = array();
$total_chaines = 0;
$propal_n = $this->get_total_results();
if (function_exists('enchant_broker_init'))
{
$r = enchant_broker_init();
if (enchant_broker_dict_exists($r, $tag))
{
$d = enchant_broker_request_dict($r, $tag);
foreach ($pieces as $piece)
{
if (trim($piece) === '')
continue;
$found = false;
$suggs = array($piece);
if (enchant_dict_check($d, $piece) == false)
{
$suggs = array_unique(array_merge($suggs, enchant_dict_suggest($d, $piece)));
}
$suggestions[$n] = array('original' => $piece, 'suggs' => $suggs);
$n++;
}
enchant_broker_free_dict($d);
}
enchant_broker_free($r);
}
if ($only_last_word)
{
foreach ($pieces as $piece)
{
foreach ($this->get_sugg_trigrams($piece) as $tri_sugg)
{
$suggestions[$n] = array('original' => $piece, 'suggs' => array($tri_sugg));
$n++;
}
}
}
$q_todo = array($supposed_qry);
$n = 0;
foreach ($suggestions as $suggestion)
{
$tmp_qq = array();
foreach ($suggestion['suggs'] as $sugg)
{
foreach ($q_todo as $q_td)
{
$tmp_qq[] = $q_td;
$tmp_data = str_replace($suggestion['original'], $sugg, $q_td);
$tmp_qq[] = $tmp_data;
}
$tmp_qq[] = str_replace($suggestion['original'], $sugg, $supposed_qry);
}
$q_todo = array_unique(array_merge($tmp_qq, array($supposed_qry)));
$n++;
}
$propals = array(array('value' => $supposed_qry, 'current' => true, 'hits' => $this->get_total_results()));
foreach ($q_todo as $f)
{
if ($f == $supposed_qry)
continue;
$clef_unique_datas = 'sph_sugg_' . crc32(serialize($this->options) . $this->current_index . $f);
try
{
$datas = $appbox->get_data_from_cache($clef_unique_datas);
}
catch (Exception $e)
{
$datas = false;
}
if (is_int($datas))
{
$found = $datas;
$cache = true;
}
else
{
$cache = false;
$found = 0;
$tmp_res = $this->sphinx->Query($f, $this->current_index);
if ($tmp_res !== false && isset($tmp_res['total_found']))
{
$found = (int) $tmp_res['total_found'];
}
$appbox->set_data_to_cache($found, $clef_unique_datas, 3600);
}
if ($found > 0)
{
$propals[] = array('value' => $f, 'current' => false, 'hits' => $found, 'cache' => $cache);
}
}
usort($propals, array('self', 'suggestions_hit_sorter'));
$max = 0;
foreach ($propals as $key => $prop)
{
$max = max($max, $prop['hits'] * 1 / 100);
if ($prop['hits'] < $max)
unset($propals[$key]);
}
$appbox->set_data_to_cache($propals, $clef, 3600);
return $propals;
}
protected static function suggestions_hit_sorter($a, $b)
{
if ($a['hits'] == $b['hits'])
{
return 0;
}
return ($a['hits'] > $b['hits']) ? -1 : 1;
}
/**
*
* @return string
*/
public function get_parsed_query()
{
return $this->query;
}
/**
*
* @param string $query
* @param array $fields
* @param int $selected_sbas_id
* @return array
*/
public function build_excerpt($query, array $fields, record_adapter $record)
{
$appbox = appbox::get_instance();
$session = $appbox->get_session();
$selected_sbas_id = $record->get_sbas_id();
$index = '';
$params = phrasea::sbas_params();
$index_keys = array();
foreach ($params as $sbas_id => $params)
{
if ($sbas_id != $selected_sbas_id)
continue;
$index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname'])));
}
if (count($index_keys) > 0)
{
if ($this->search_in_field === false)
{
$index = '';
$found = false;
if ($this->options->get_use_stemming())
{
if ($session->get_I18n() == 'fr')
{
$index .= 'documents' . implode('_stemmed_fr, documents', $index_keys) . '_stemmed_fr';
$found = true;
}
elseif ($session->get_I18n() == 'en')
{
$index .= 'documents' . implode('_stemmed_en, documents', $index_keys) . '_stemmed_en';
$found = true;
}
}
if (!$found)
$index .= 'documents' . implode(', documents', $index_keys);
}
else
{
$index = '';
$found = false;
if ($this->options->get_use_stemming() && $session->get_I18n() == 'fr')
{
if ($session->get_I18n() == 'fr')
{
$index .= 'metadatas' . implode('_stemmed_fr, metadatas', $index_keys) . '_stemmed_fr';
$found = true;
}
elseif ($session->get_I18n() == 'en')
{
$index .= 'metadatas' . implode('_stemmed_en, metadatas', $index_keys) . '_stemmed_en';
$found = true;
}
}
if (!$found)
$index = 'metadatas' . implode(',metadatas', $index_keys);
}
}
$opts = array(
'before_match' => "<em>",
'after_match' => "</em>"
);
return $this->sphinx->BuildExcerpts($fields, $index, $query, $opts);
}
}