setDescription('Generate suggestions for Sphinx Search Engine');
return $this;
}
public function requireSetup()
{
return true;
}
protected function doExecute(InputInterface $input, OutputInterface $output)
{
define('FREQ_THRESHOLD', 10);
define('SUGGEST_DEBUG', 0);
$params = phrasea::sbas_params($this->container);
foreach ($params as $sbas_id => $p) {
$index = crc32(
str_replace(
array('.', '%')
, '_'
, sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname'])
)
);
$tmp_file = $this->container['phraseanet.registry']->get('GV_RootPath') . 'tmp/dict' . $index . '.txt';
$databox = $this->getService('phraseanet.appbox')->get_databox($sbas_id);
$output->writeln("process Databox " . $databox->get_viewname() . " / $index\n");
if ( ! is_executable("/usr/local/bin/indexer")) {
$output->writeln("'/usr/local/bin/indexer' is not executable");
return 1;
}
if ( ! file_exists($tmp_file)) {
$output->writeln(" file '" . $tmp_file . "' does not exist");
return 1;
}
$cmd = '/usr/local/bin/indexer metadatas' . $index . ' --buildstops ' . $tmp_file . ' 1000000 --buildfreqs';
exec($cmd);
try {
$connbas = connection::getPDOConnection($this->container, $sbas_id);
} catch (Exception $e) {
continue;
}
$sql = 'TRUNCATE suggest';
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
$sql = $this->BuildDictionarySQL($output, file_get_contents($tmp_file));
if (trim($sql) !== '') {
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
}
unlink($tmp_file);
}
return 0;
}
protected function BuildTrigrams($keyword)
{
$t = "__" . $keyword . "__";
$trigrams = "";
for ($i = 0; $i < strlen($t) - 2; $i ++ )
$trigrams .= substr($t, $i, 3) . " ";
return $trigrams;
}
protected function BuildDictionarySQL(OutputInterface $output, $in)
{
$out = '';
$n = 0;
$lines = explode("\n", $in);
foreach ($lines as $line) {
if (trim($line) === '')
continue;
list ( $keyword, $freq ) = explode(" ", trim($line));
if ($freq < FREQ_THRESHOLD || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false)
continue;
if (ctype_digit($keyword)) {
continue;
}
if (mb_strlen($keyword) < 3) {
continue;
}
$trigrams = $this->BuildTrigrams($keyword);
if ($n ++)
$out .= ",\n";
$out .= "( $n, '$keyword', '$trigrams', $freq )";
}
if (trim($out) !== '') {
$out = "INSERT INTO suggest VALUES " . $out . ";";
}
$output->writeln(sprintf("Generated %d suggestions", $n));
return $out;
}
}