Files
Phraseanet/lib/classes/module/console/sphinxGenerateSuggestion.php
Romain Neutron e233e5afa6 Merge branch '3.8'
Conflicts:
	lib/Alchemy/Phrasea/Command/Developer/JavascriptBuilder.php
	lib/Alchemy/Phrasea/Controller/Prod/Basket.php
	lib/Alchemy/Phrasea/Core/Provider/TaskManagerServiceProvider.php
	lib/classes/Exception/Feed/ItemNotFound.php
	lib/classes/Exception/Feed/PublisherNotFound.php
	lib/classes/Feed/Abstract.php
	lib/classes/Feed/Adapter.php
	lib/classes/Feed/Aggregate.php
	lib/classes/Feed/Collection.php
	lib/classes/Feed/CollectionInterface.php
	lib/classes/Feed/Entry/Adapter.php
	lib/classes/Feed/Entry/Collection.php
	lib/classes/Feed/Entry/Interface.php
	lib/classes/Feed/Entry/Item.php
	lib/classes/Feed/Entry/ItemInterface.php
	lib/classes/Feed/Interface.php
	lib/classes/Feed/Link.php
	lib/classes/Feed/LinkInterface.php
	lib/classes/Feed/Publisher/Adapter.php
	lib/classes/Feed/Publisher/Interface.php
	lib/classes/Feed/Token.php
	lib/classes/Feed/TokenAggregate.php
	lib/classes/Feed/XML/Abstract.php
	lib/classes/Feed/XML/Atom.php
	lib/classes/Feed/XML/Cooliris.php
	lib/classes/Feed/XML/Interface.php
	lib/classes/Feed/XML/RSS.php
	lib/classes/Feed/XML/RSS/ImageInterface.php
	lib/classes/http/request.php
	lib/classes/module/console/schedulerStart.php
	lib/classes/module/console/schedulerState.php
	lib/classes/module/console/schedulerStop.php
	lib/classes/module/console/taskState.php
	lib/classes/module/console/tasklist.php
	lib/classes/module/console/taskrun.php
	lib/classes/registry.php
	lib/classes/registryInterface.php
	lib/classes/set/order.php
	lib/classes/system/url.php
	lib/classes/task/Scheduler.php
	lib/classes/task/appboxAbstract.php
	lib/classes/task/databoxAbstract.php
	lib/classes/task/manager.php
	lib/classes/task/period/RecordMover.php
	lib/classes/task/period/apibridge.php
	lib/classes/task/period/archive.php
	lib/classes/task/period/cindexer.php
	lib/classes/task/period/emptyColl.php
	lib/classes/task/period/ftp.php
	lib/classes/task/period/ftpPull.php
	lib/classes/task/period/subdef.php
	lib/classes/task/period/test.php
	lib/classes/task/period/writemeta.php
	lib/conf.d/PhraseaFixture/AbstractWZ.php
	lib/conf.d/PhraseaFixture/Basket/LoadFiveBaskets.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasket.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasketEnv.php
	lib/conf.d/PhraseaFixture/Lazaret/LoadOneFile.php
	lib/conf.d/PhraseaFixture/Story/LoadOneStory.php
	lib/conf.d/PhraseaFixture/UsrLists/ListAbstract.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrList.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListEntry.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListOwner.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadOneParticipant.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadParticipantWithSession.php
	lib/conf.d/PhraseaFixture/ValidationSession/LoadOneValidationSession.php
2014-01-06 15:38:14 +01:00

145 lines
4.0 KiB
PHP

<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2014 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Alchemy\Phrasea\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Process\ProcessBuilder;
class module_console_sphinxGenerateSuggestion extends Command
{
public function __construct($name = null)
{
parent::__construct($name);
$this->setDescription('Generates suggestions for Sphinx Search Engine, useful for query auto-completion');
return $this;
}
protected function doExecute(InputInterface $input, OutputInterface $output)
{
define('FREQ_THRESHOLD', 10);
define('SUGGEST_DEBUG', 0);
$params = phrasea::sbas_params($this->container);
foreach ($params as $sbas_id => $p) {
$index = sprintf("%u", crc32(
str_replace(
['.', '%']
, '_'
, sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname'])
)
));
$tmp_file = $this->container['root.path'] . '/tmp/dict' . $index . '.txt';
$databox = $this->getService('phraseanet.appbox')->get_databox($sbas_id);
$output->writeln("process Databox " . $databox->get_label($this->container['locale']) . " / $index\n");
if ( ! is_executable("/usr/local/bin/indexer")) {
$output->writeln("<error>'/usr/local/bin/indexer' is not executable</error>");
return 1;
}
$builder = ProcessBuilder::create(['/usr/local/bin/indexer']);
$builder->add('metadatas' . $index)
->add('--buildstops')
->add($tmp_file)
->add(1000000)
->add('--buildfreqs');
$builder->getProcess()->run();
if ( ! file_exists($tmp_file)) {
$output->writeln("<error> file '" . $tmp_file . "' does not exist</error>");
return 1;
}
try {
$connbas = connection::getPDOConnection($this->container, $sbas_id);
} catch (Exception $e) {
continue;
}
$sql = 'TRUNCATE suggest';
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
$sql = $this->BuildDictionarySQL($output, file_get_contents($tmp_file));
if (trim($sql) !== '') {
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
}
unlink($tmp_file);
}
return 0;
}
protected function BuildTrigrams($keyword)
{
$t = "__" . $keyword . "__";
$trigrams = "";
for ($i = 0; $i < strlen($t) - 2; $i ++ )
$trigrams .= substr($t, $i, 3) . " ";
return $trigrams;
}
protected function BuildDictionarySQL(OutputInterface $output, $in)
{
$out = '';
$n = 0;
$lines = explode("\n", $in);
foreach ($lines as $line) {
if (trim($line) === '')
continue;
list ( $keyword, $freq ) = explode(" ", trim($line));
if ($freq < FREQ_THRESHOLD || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false)
continue;
if (ctype_digit($keyword)) {
continue;
}
if (mb_strlen($keyword) < 3) {
continue;
}
$trigrams = $this->BuildTrigrams($keyword);
if ($n ++)
$out .= ",\n";
$out .= "( $n, '$keyword', '$trigrams', $freq )";
}
if (trim($out) !== '') {
$out = "INSERT INTO suggest VALUES " . $out . ";";
}
$output->writeln(sprintf("Generated <info>%d</info> suggestions", $n));
return $out;
}
}