[SearchEngine] Add unit tests

This commit is contained in:
Romain Neutron
2012-08-28 11:42:30 +02:00
parent afdeb22673
commit b284cb2661
5 changed files with 125 additions and 42 deletions

View File

@@ -0,0 +1,14 @@
<?php
namespace Alchemy\Phrasea\SearchEngine;
use Silex\Application;
use Symfony\Component\HttpFoundation\Request;
interface ConfigurationPanelInterface
{
public function get(Application $app, Request $request);
public function post(Application $app, Request $request);
}

View File

@@ -2,9 +2,10 @@
namespace Alchemy\Phrasea\SearchEngine\Phrasea; namespace Alchemy\Phrasea\SearchEngine\Phrasea;
use Alchemy\Phrasea\SearchEngine\ConfigurationPanelInterface;
use Silex\Application; use Silex\Application;
class ConfigurationPanel class ConfigurationPanel implements ConfigurationPanelInterface
{ {
protected $charsets; protected $charsets;
protected $searchEngine; protected $searchEngine;

View File

@@ -32,9 +32,7 @@ interface SearchEngineInterface
*/ */
public function status(); public function status();
public function getConfigurationPanel(Application $app, Request $request); public function configurationPanel();
public function postConfigurationPanel(Application $app, Request $request);
/** /**
* *

View File

@@ -2,11 +2,12 @@
namespace Alchemy\Phrasea\SearchEngine\SphinxSearch; namespace Alchemy\Phrasea\SearchEngine\SphinxSearch;
use Alchemy\Phrasea\SearchEngine\ConfigurationPanelInterface;
use Silex\Application; use Silex\Application;
use Symfony\Component\Finder\Finder; use Symfony\Component\Finder\Finder;
use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Request;
class ConfigurationPanel class ConfigurationPanel implements ConfigurationPanelInterface
{ {
protected $charsets; protected $charsets;
protected $searchEngine; protected $searchEngine;

View File

@@ -18,7 +18,9 @@ use Alchemy\Phrasea\SearchEngine\SearchEngineSuggestion;
use Alchemy\Phrasea\Exception\RuntimeException; use Alchemy\Phrasea\Exception\RuntimeException;
use Doctrine\Common\Collections\ArrayCollection; use Doctrine\Common\Collections\ArrayCollection;
use Silex\Application; use Silex\Application;
use Symfony\Component\Process\ExecutableFinder;
use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Process\Process;
require_once __DIR__ . '/../../../../vendor/sphinx/sphinxapi.php'; require_once __DIR__ . '/../../../../vendor/sphinx/sphinxapi.php';
@@ -29,6 +31,11 @@ class SphinxSearchEngine implements SearchEngineInterface
* @var \SphinxClient * @var \SphinxClient
*/ */
protected $sphinx; protected $sphinx;
/**
*
* @var \SphinxClient
*/
protected $suggestionClient;
/** /**
* *
@@ -43,13 +50,18 @@ class SphinxSearchEngine implements SearchEngineInterface
$this->options = new SearchEngineOptions(); $this->options = new SearchEngineOptions();
$this->sphinx = new \SphinxClient(); $this->sphinx = new \SphinxClient();
$this->sphinx->SetServer($host, $port); $this->sphinx->SetServer($host, $port);
$this->sphinx->SetArrayResult(true); $this->sphinx->SetArrayResult(true);
$this->sphinx->SetConnectTimeout(1); $this->sphinx->SetConnectTimeout(1);
$this->suggestionClient = new \SphinxClient();
$this->suggestionClient->SetServer($host, $port);
$this->suggestionClient->SetArrayResult(true);
$this->suggestionClient->SetConnectTimeout(1);
try { try {
$this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port)); $this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port));
$this->rt_conn->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
} catch (\PDOException $e) { } catch (\PDOException $e) {
$this->rt_conn = null; $this->rt_conn = null;
} }
@@ -59,9 +71,11 @@ class SphinxSearchEngine implements SearchEngineInterface
public function status() public function status()
{ {
$status = $this->sphinx->Status(); if (false === $this->sphinx->Status()) {
throw new RuntimeException(_('Sphinx server is offline'));
}
if (false === $status) { if (false === $this->suggestionClient->Status()) {
throw new RuntimeException(_('Sphinx server is offline')); throw new RuntimeException(_('Sphinx server is offline'));
} }
@@ -69,17 +83,7 @@ class SphinxSearchEngine implements SearchEngineInterface
throw new RuntimeException('Unable to connect to sphinx rt'); throw new RuntimeException('Unable to connect to sphinx rt');
} }
return $status; return $this->sphinx->Status();
}
public function getConfigurationPanel(Application $app, Request $request)
{
return $this->configurationPanel()->get($app, $request);
}
public function postConfigurationPanel(Application $app, Request $request)
{
return $this->configurationPanel()->post($app, $request);
} }
/** /**
@@ -167,9 +171,7 @@ class SphinxSearchEngine implements SearchEngineInterface
$this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1))); $this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1)));
} }
$stmt = $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId()); $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId());
$stmt->execute();
$stmt->closeCursor();
} }
} }
@@ -240,7 +242,6 @@ class SphinxSearchEngine implements SearchEngineInterface
{ {
$this->sphinx->ResetGroupBy(); $this->sphinx->ResetGroupBy();
$this->sphinx->ResetFilters(); $this->sphinx->ResetFilters();
$this->sphinx->ResetOverrides();
} }
public function query($query, $offset, $perPage) public function query($query, $offset, $perPage)
@@ -251,10 +252,10 @@ class SphinxSearchEngine implements SearchEngineInterface
$query = $this->parseQuery($query); $query = $this->parseQuery($query);
$preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); $preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0);
if ($preg > 0) { if ($preg > 0) {
$this->sphinx->SetFilter('record_id', array($matches[1])); $this->sphinx->SetFilter('record_id', array($matches[2]));
$query = ''; $query = '';
} }
@@ -520,6 +521,10 @@ class SphinxSearchEngine implements SearchEngineInterface
$altVersions = array(); $altVersions = array();
foreach ($words as $word) {
$altVersions[$word] = array($word);
}
// As we got words, we look for alternate word for each of them // As we got words, we look for alternate word for each of them
if (function_exists('enchant_broker_init') && $this->options->getLocale()) { if (function_exists('enchant_broker_init') && $this->options->getLocale()) {
$broker = enchant_broker_init(); $broker = enchant_broker_init();
@@ -529,7 +534,7 @@ class SphinxSearchEngine implements SearchEngineInterface
foreach ($words as $word) { foreach ($words as $word) {
if (enchant_dict_check($dictionnary, $word) == false) { if (enchant_dict_check($dictionnary, $word) == false) {
$suggs = array_merge(array($word), enchant_dict_suggest($dictionnary, $word)); $suggs = array_merge(enchant_dict_suggest($dictionnary, $word));
} }
$altVersions[$word] = array_unique($suggs); $altVersions[$word] = array_unique($suggs);
@@ -566,7 +571,6 @@ class SphinxSearchEngine implements SearchEngineInterface
foreach ($queries as $alt_query) { foreach ($queries as $alt_query) {
$results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query)); $results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query));
if ($results !== false && isset($results['total_found'])) { if ($results !== false && isset($results['total_found'])) {
if ($results['total_found'] > 0) { if ($results['total_found'] > 0) {
@@ -618,12 +622,12 @@ class SphinxSearchEngine implements SearchEngineInterface
$this->resetSphinx(); $this->resetSphinx();
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); $this->suggestionClient->SetMatchMode(SPH_MATCH_EXTENDED2);
$this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT); $this->suggestionClient->SetRankingMode(SPH_RANK_WORDCOUNT);
$this->sphinx->SetFilterRange("len", $len - 2, $len + 4); $this->suggestionClient->SetFilterRange("len", $len - 2, $len + 4);
$this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC"); $this->suggestionClient->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC");
$this->sphinx->SetLimits(0, 10); $this->suggestionClient->SetLimits(0, 10);
$indexes = array(); $indexes = array();
@@ -632,10 +636,9 @@ class SphinxSearchEngine implements SearchEngineInterface
} }
$index = implode(',', $indexes); $index = implode(',', $indexes);
$res = $this->suggestionClient->Query($query, $index);
$res = $this->sphinx->Query($query, $index); if ($this->suggestionClient->Status() === false) {
if ($this->sphinx->Status() === false) {
return array(); return array();
} }
@@ -643,16 +646,11 @@ class SphinxSearchEngine implements SearchEngineInterface
return array(); return array();
} }
$this->sphinx->ResetGroupBy();
$this->sphinx->ResetFilters();
$words = array(); $words = array();
foreach ($res["matches"] as $match) { foreach ($res["matches"] as $match) {
$words[] = $match['attrs']['keyword']; $words[] = $match['attrs']['keyword'];
} }
$this->applyOptions($this->options);
return $words; return $words;
} }
@@ -669,14 +667,14 @@ class SphinxSearchEngine implements SearchEngineInterface
if (count($index_keys) > 0) { if (count($index_keys) > 0) {
if ($this->options->fields() || $this->options->businessFieldsOn()) { if ($this->options->fields() || $this->options->businessFieldsOn()) {
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
$index = ', metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale(); $index = 'metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale();
} else { } else {
$index = 'metadatas' . implode(',metadatas', $index_keys); $index = 'metadatas' . implode(',metadatas', $index_keys);
} }
$index .= ', metas_realtime' . implode(', metas_realtime', $index_keys); $index .= ', metas_realtime' . implode(', metas_realtime', $index_keys);
} else { } else {
if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) {
$index = ', documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale(); $index = 'documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale();
} else { } else {
$index = 'documents' . implode(', documents', $index_keys); $index = 'documents' . implode(', documents', $index_keys);
} }
@@ -721,5 +719,76 @@ class SphinxSearchEngine implements SearchEngineInterface
return $query; return $query;
} }
public function buildSuggestions(array $databoxes, $configuration, $threshold = 10)
{
$executableFinder = new ExecutableFinder();
$indexer = $executableFinder->find('indexer');
if ( ! is_executable($indexer)) {
throw new RuntimeException('Indexer does not seem to be executable');
}
foreach ($databoxes as $databox) {
$tmp_file = tempnam(sys_get_temp_dir(), 'sphinx_sugg');
$cmd = $indexer . ' --config ' . $configuration . ' metadatas' . $this->CRCdatabox($databox)
. ' --buildstops ' . $tmp_file . ' 1000000 --buildfreqs';
$process = new Process($cmd);
$process->run();
$sql = 'TRUNCATE suggest';
$stmt = $databox->get_connection()->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
if (null !== $sql = $this->BuildDictionarySQL(file_get_contents($tmp_file), $threshold)) {
$stmt = $databox->get_connection()->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
}
unlink($tmp_file);
}
return $this;
}
protected function BuildDictionarySQL($dictionnary, $threshold)
{
$out = array();
$n = 1;
$lines = explode("\n", $dictionnary);
foreach ($lines as $line) {
if (trim($line) === '') {
continue;
}
list ( $keyword, $freq ) = explode(" ", trim($line));
if ($freq < $threshold || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false) {
continue;
}
if (ctype_digit($keyword)) {
continue;
}
if (mb_strlen($keyword) < 3) {
continue;
}
$trigrams = $this->BuildTrigrams($keyword);
$out[] = "( $n, '$keyword', '$trigrams', $freq )";
$n ++;
}
if ($out) {
return "INSERT INTO suggest VALUES " . implode(",\n", $out) . ";";
}
return null;
}
} }