mirror of
https://github.com/alchemy-fr/Phraseanet.git
synced 2025-10-12 04:23:19 +00:00
[SearchEngine] Add unit tests
This commit is contained in:
@@ -0,0 +1,14 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Alchemy\Phrasea\SearchEngine;
|
||||||
|
|
||||||
|
use Silex\Application;
|
||||||
|
use Symfony\Component\HttpFoundation\Request;
|
||||||
|
|
||||||
|
interface ConfigurationPanelInterface
|
||||||
|
{
|
||||||
|
|
||||||
|
public function get(Application $app, Request $request);
|
||||||
|
|
||||||
|
public function post(Application $app, Request $request);
|
||||||
|
}
|
@@ -2,9 +2,10 @@
|
|||||||
|
|
||||||
namespace Alchemy\Phrasea\SearchEngine\Phrasea;
|
namespace Alchemy\Phrasea\SearchEngine\Phrasea;
|
||||||
|
|
||||||
|
use Alchemy\Phrasea\SearchEngine\ConfigurationPanelInterface;
|
||||||
use Silex\Application;
|
use Silex\Application;
|
||||||
|
|
||||||
class ConfigurationPanel
|
class ConfigurationPanel implements ConfigurationPanelInterface
|
||||||
{
|
{
|
||||||
protected $charsets;
|
protected $charsets;
|
||||||
protected $searchEngine;
|
protected $searchEngine;
|
||||||
|
@@ -32,9 +32,7 @@ interface SearchEngineInterface
|
|||||||
*/
|
*/
|
||||||
public function status();
|
public function status();
|
||||||
|
|
||||||
public function getConfigurationPanel(Application $app, Request $request);
|
public function configurationPanel();
|
||||||
|
|
||||||
public function postConfigurationPanel(Application $app, Request $request);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@@ -2,11 +2,12 @@
|
|||||||
|
|
||||||
namespace Alchemy\Phrasea\SearchEngine\SphinxSearch;
|
namespace Alchemy\Phrasea\SearchEngine\SphinxSearch;
|
||||||
|
|
||||||
|
use Alchemy\Phrasea\SearchEngine\ConfigurationPanelInterface;
|
||||||
use Silex\Application;
|
use Silex\Application;
|
||||||
use Symfony\Component\Finder\Finder;
|
use Symfony\Component\Finder\Finder;
|
||||||
use Symfony\Component\HttpFoundation\Request;
|
use Symfony\Component\HttpFoundation\Request;
|
||||||
|
|
||||||
class ConfigurationPanel
|
class ConfigurationPanel implements ConfigurationPanelInterface
|
||||||
{
|
{
|
||||||
protected $charsets;
|
protected $charsets;
|
||||||
protected $searchEngine;
|
protected $searchEngine;
|
||||||
|
@@ -18,7 +18,9 @@ use Alchemy\Phrasea\SearchEngine\SearchEngineSuggestion;
|
|||||||
use Alchemy\Phrasea\Exception\RuntimeException;
|
use Alchemy\Phrasea\Exception\RuntimeException;
|
||||||
use Doctrine\Common\Collections\ArrayCollection;
|
use Doctrine\Common\Collections\ArrayCollection;
|
||||||
use Silex\Application;
|
use Silex\Application;
|
||||||
|
use Symfony\Component\Process\ExecutableFinder;
|
||||||
use Symfony\Component\HttpFoundation\Request;
|
use Symfony\Component\HttpFoundation\Request;
|
||||||
|
use Symfony\Component\Process\Process;
|
||||||
|
|
||||||
require_once __DIR__ . '/../../../../vendor/sphinx/sphinxapi.php';
|
require_once __DIR__ . '/../../../../vendor/sphinx/sphinxapi.php';
|
||||||
|
|
||||||
@@ -29,6 +31,11 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
* @var \SphinxClient
|
* @var \SphinxClient
|
||||||
*/
|
*/
|
||||||
protected $sphinx;
|
protected $sphinx;
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @var \SphinxClient
|
||||||
|
*/
|
||||||
|
protected $suggestionClient;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -43,13 +50,18 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
$this->options = new SearchEngineOptions();
|
$this->options = new SearchEngineOptions();
|
||||||
|
|
||||||
$this->sphinx = new \SphinxClient();
|
$this->sphinx = new \SphinxClient();
|
||||||
|
|
||||||
$this->sphinx->SetServer($host, $port);
|
$this->sphinx->SetServer($host, $port);
|
||||||
$this->sphinx->SetArrayResult(true);
|
$this->sphinx->SetArrayResult(true);
|
||||||
$this->sphinx->SetConnectTimeout(1);
|
$this->sphinx->SetConnectTimeout(1);
|
||||||
|
|
||||||
|
$this->suggestionClient = new \SphinxClient();
|
||||||
|
$this->suggestionClient->SetServer($host, $port);
|
||||||
|
$this->suggestionClient->SetArrayResult(true);
|
||||||
|
$this->suggestionClient->SetConnectTimeout(1);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
$this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port));
|
$this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port));
|
||||||
|
$this->rt_conn->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
|
||||||
} catch (\PDOException $e) {
|
} catch (\PDOException $e) {
|
||||||
$this->rt_conn = null;
|
$this->rt_conn = null;
|
||||||
}
|
}
|
||||||
@@ -59,9 +71,11 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
public function status()
|
public function status()
|
||||||
{
|
{
|
||||||
$status = $this->sphinx->Status();
|
if (false === $this->sphinx->Status()) {
|
||||||
|
throw new RuntimeException(_('Sphinx server is offline'));
|
||||||
|
}
|
||||||
|
|
||||||
if (false === $status) {
|
if (false === $this->suggestionClient->Status()) {
|
||||||
throw new RuntimeException(_('Sphinx server is offline'));
|
throw new RuntimeException(_('Sphinx server is offline'));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -69,17 +83,7 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
throw new RuntimeException('Unable to connect to sphinx rt');
|
throw new RuntimeException('Unable to connect to sphinx rt');
|
||||||
}
|
}
|
||||||
|
|
||||||
return $status;
|
return $this->sphinx->Status();
|
||||||
}
|
|
||||||
|
|
||||||
public function getConfigurationPanel(Application $app, Request $request)
|
|
||||||
{
|
|
||||||
return $this->configurationPanel()->get($app, $request);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function postConfigurationPanel(Application $app, Request $request)
|
|
||||||
{
|
|
||||||
return $this->configurationPanel()->post($app, $request);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -167,9 +171,7 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
$this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1)));
|
$this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
$stmt = $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId());
|
$this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId());
|
||||||
$stmt->execute();
|
|
||||||
$stmt->closeCursor();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -240,7 +242,6 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
{
|
{
|
||||||
$this->sphinx->ResetGroupBy();
|
$this->sphinx->ResetGroupBy();
|
||||||
$this->sphinx->ResetFilters();
|
$this->sphinx->ResetFilters();
|
||||||
$this->sphinx->ResetOverrides();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function query($query, $offset, $perPage)
|
public function query($query, $offset, $perPage)
|
||||||
@@ -251,10 +252,10 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
$query = $this->parseQuery($query);
|
$query = $this->parseQuery($query);
|
||||||
|
|
||||||
$preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $query, $matches, 0, 0);
|
$preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0);
|
||||||
|
|
||||||
if ($preg > 0) {
|
if ($preg > 0) {
|
||||||
$this->sphinx->SetFilter('record_id', array($matches[1]));
|
$this->sphinx->SetFilter('record_id', array($matches[2]));
|
||||||
$query = '';
|
$query = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -520,6 +521,10 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
$altVersions = array();
|
$altVersions = array();
|
||||||
|
|
||||||
|
foreach ($words as $word) {
|
||||||
|
$altVersions[$word] = array($word);
|
||||||
|
}
|
||||||
|
|
||||||
// As we got words, we look for alternate word for each of them
|
// As we got words, we look for alternate word for each of them
|
||||||
if (function_exists('enchant_broker_init') && $this->options->getLocale()) {
|
if (function_exists('enchant_broker_init') && $this->options->getLocale()) {
|
||||||
$broker = enchant_broker_init();
|
$broker = enchant_broker_init();
|
||||||
@@ -529,7 +534,7 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
foreach ($words as $word) {
|
foreach ($words as $word) {
|
||||||
|
|
||||||
if (enchant_dict_check($dictionnary, $word) == false) {
|
if (enchant_dict_check($dictionnary, $word) == false) {
|
||||||
$suggs = array_merge(array($word), enchant_dict_suggest($dictionnary, $word));
|
$suggs = array_merge(enchant_dict_suggest($dictionnary, $word));
|
||||||
}
|
}
|
||||||
|
|
||||||
$altVersions[$word] = array_unique($suggs);
|
$altVersions[$word] = array_unique($suggs);
|
||||||
@@ -566,7 +571,6 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
foreach ($queries as $alt_query) {
|
foreach ($queries as $alt_query) {
|
||||||
$results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query));
|
$results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query));
|
||||||
|
|
||||||
if ($results !== false && isset($results['total_found'])) {
|
if ($results !== false && isset($results['total_found'])) {
|
||||||
if ($results['total_found'] > 0) {
|
if ($results['total_found'] > 0) {
|
||||||
|
|
||||||
@@ -618,12 +622,12 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
$this->resetSphinx();
|
$this->resetSphinx();
|
||||||
|
|
||||||
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
|
$this->suggestionClient->SetMatchMode(SPH_MATCH_EXTENDED2);
|
||||||
$this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT);
|
$this->suggestionClient->SetRankingMode(SPH_RANK_WORDCOUNT);
|
||||||
$this->sphinx->SetFilterRange("len", $len - 2, $len + 4);
|
$this->suggestionClient->SetFilterRange("len", $len - 2, $len + 4);
|
||||||
|
|
||||||
$this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC");
|
$this->suggestionClient->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC");
|
||||||
$this->sphinx->SetLimits(0, 10);
|
$this->suggestionClient->SetLimits(0, 10);
|
||||||
|
|
||||||
$indexes = array();
|
$indexes = array();
|
||||||
|
|
||||||
@@ -632,10 +636,9 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
$index = implode(',', $indexes);
|
$index = implode(',', $indexes);
|
||||||
|
$res = $this->suggestionClient->Query($query, $index);
|
||||||
|
|
||||||
$res = $this->sphinx->Query($query, $index);
|
if ($this->suggestionClient->Status() === false) {
|
||||||
|
|
||||||
if ($this->sphinx->Status() === false) {
|
|
||||||
return array();
|
return array();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -643,16 +646,11 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
return array();
|
return array();
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->sphinx->ResetGroupBy();
|
|
||||||
$this->sphinx->ResetFilters();
|
|
||||||
|
|
||||||
$words = array();
|
$words = array();
|
||||||
foreach ($res["matches"] as $match) {
|
foreach ($res["matches"] as $match) {
|
||||||
$words[] = $match['attrs']['keyword'];
|
$words[] = $match['attrs']['keyword'];
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->applyOptions($this->options);
|
|
||||||
|
|
||||||
return $words;
|
return $words;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -669,14 +667,14 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
if (count($index_keys) > 0) {
|
if (count($index_keys) > 0) {
|
||||||
if ($this->options->fields() || $this->options->businessFieldsOn()) {
|
if ($this->options->fields() || $this->options->businessFieldsOn()) {
|
||||||
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
|
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
|
||||||
$index = ', metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale();
|
$index = 'metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale();
|
||||||
} else {
|
} else {
|
||||||
$index = 'metadatas' . implode(',metadatas', $index_keys);
|
$index = 'metadatas' . implode(',metadatas', $index_keys);
|
||||||
}
|
}
|
||||||
$index .= ', metas_realtime' . implode(', metas_realtime', $index_keys);
|
$index .= ', metas_realtime' . implode(', metas_realtime', $index_keys);
|
||||||
} else {
|
} else {
|
||||||
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
|
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
|
||||||
$index = ', documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale();
|
$index = 'documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale();
|
||||||
} else {
|
} else {
|
||||||
$index = 'documents' . implode(', documents', $index_keys);
|
$index = 'documents' . implode(', documents', $index_keys);
|
||||||
}
|
}
|
||||||
@@ -721,5 +719,76 @@ class SphinxSearchEngine implements SearchEngineInterface
|
|||||||
|
|
||||||
return $query;
|
return $query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function buildSuggestions(array $databoxes, $configuration, $threshold = 10)
|
||||||
|
{
|
||||||
|
$executableFinder = new ExecutableFinder();
|
||||||
|
$indexer = $executableFinder->find('indexer');
|
||||||
|
|
||||||
|
if ( ! is_executable($indexer)) {
|
||||||
|
throw new RuntimeException('Indexer does not seem to be executable');
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($databoxes as $databox) {
|
||||||
|
$tmp_file = tempnam(sys_get_temp_dir(), 'sphinx_sugg');
|
||||||
|
|
||||||
|
$cmd = $indexer . ' --config ' . $configuration . ' metadatas' . $this->CRCdatabox($databox)
|
||||||
|
. ' --buildstops ' . $tmp_file . ' 1000000 --buildfreqs';
|
||||||
|
$process = new Process($cmd);
|
||||||
|
$process->run();
|
||||||
|
|
||||||
|
$sql = 'TRUNCATE suggest';
|
||||||
|
$stmt = $databox->get_connection()->prepare($sql);
|
||||||
|
$stmt->execute();
|
||||||
|
$stmt->closeCursor();
|
||||||
|
|
||||||
|
if (null !== $sql = $this->BuildDictionarySQL(file_get_contents($tmp_file), $threshold)) {
|
||||||
|
$stmt = $databox->get_connection()->prepare($sql);
|
||||||
|
$stmt->execute();
|
||||||
|
$stmt->closeCursor();
|
||||||
|
}
|
||||||
|
|
||||||
|
unlink($tmp_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function BuildDictionarySQL($dictionnary, $threshold)
|
||||||
|
{
|
||||||
|
$out = array();
|
||||||
|
|
||||||
|
$n = 1;
|
||||||
|
$lines = explode("\n", $dictionnary);
|
||||||
|
foreach ($lines as $line) {
|
||||||
|
if (trim($line) === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
list ( $keyword, $freq ) = explode(" ", trim($line));
|
||||||
|
|
||||||
|
if ($freq < $threshold || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctype_digit($keyword)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mb_strlen($keyword) < 3) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$trigrams = $this->BuildTrigrams($keyword);
|
||||||
|
|
||||||
|
$out[] = "( $n, '$keyword', '$trigrams', $freq )";
|
||||||
|
$n ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($out) {
|
||||||
|
return "INSERT INTO suggest VALUES " . implode(",\n", $out) . ";";
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user