Files
Phraseanet/lib/classes/module/console/sphinxGenerateSuggestion.php
Nicolas Le Goff 949bf06cac Merge branch '3.8'
Conflicts:
	CHANGELOG.md
	bin/console
	bin/developer
	bin/setup
	bower.json
	composer.json
	composer.lock
	features/bootstrap/FeatureContext.php
	features/bootstrap/GuiContext.php
	lib/Alchemy/Phrasea/Authentication/Token/TokenValidator.php
	lib/Alchemy/Phrasea/Command/BuildMissingSubdefs.php
	lib/Alchemy/Phrasea/Command/CreateCollection.php
	lib/Alchemy/Phrasea/Command/Developer/JavascriptBuilder.php
	lib/Alchemy/Phrasea/Controller/Admin/Collection.php
	lib/Alchemy/Phrasea/Controller/Admin/Databoxes.php
	lib/Alchemy/Phrasea/Controller/Admin/TaskManager.php
	lib/Alchemy/Phrasea/Controller/Api/V1.php
	lib/Alchemy/Phrasea/Controller/Client/Baskets.php
	lib/Alchemy/Phrasea/Controller/Client/Root.php
	lib/Alchemy/Phrasea/Controller/Prod/Basket.php
	lib/Alchemy/Phrasea/Controller/Prod/Export.php
	lib/Alchemy/Phrasea/Controller/Prod/Property.php
	lib/Alchemy/Phrasea/Controller/Prod/Records.php
	lib/Alchemy/Phrasea/Controller/Prod/Tools.php
	lib/Alchemy/Phrasea/Controller/Prod/Upload.php
	lib/Alchemy/Phrasea/Controller/Root/Login.php
	lib/Alchemy/Phrasea/Controller/Thesaurus/Thesaurus.php
	lib/Alchemy/Phrasea/Core/Event/ApiLoadEndEvent.php
	lib/Alchemy/Phrasea/Core/Event/ApiLoadStartEvent.php
	lib/Alchemy/Phrasea/Core/Provider/TaskManagerServiceProvider.php
	lib/Alchemy/Phrasea/Core/Version.php
	lib/Alchemy/Phrasea/Exception/XMLParseErrorException.php
	lib/Alchemy/Phrasea/Helper/DatabaseHelper.php
	lib/Alchemy/Phrasea/Helper/User/Edit.php
	lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php
	lib/Alchemy/Phrasea/SearchEngine/SearchEngineOptions.php
	lib/Doctrine/Entities/AuthFailure.php
	lib/Doctrine/Entities/Basket.php
	lib/Doctrine/Entities/BasketElement.php
	lib/Doctrine/Entities/LazaretAttribute.php
	lib/Doctrine/Entities/LazaretCheck.php
	lib/Doctrine/Entities/LazaretFile.php
	lib/Doctrine/Entities/LazaretSession.php
	lib/Doctrine/Entities/Session.php
	lib/Doctrine/Entities/SessionModule.php
	lib/Doctrine/Entities/StoryWZ.php
	lib/Doctrine/Entities/UsrList.php
	lib/Doctrine/Entities/UsrListEntry.php
	lib/Doctrine/Entities/UsrListOwner.php
	lib/Doctrine/Entities/ValidationData.php
	lib/Doctrine/Entities/ValidationParticipant.php
	lib/Doctrine/Entities/ValidationSession.php
	lib/Doctrine/Logger/MonologSQLLogger.php
	lib/Doctrine/Repositories/BasketRepository.php
	lib/Doctrine/Repositories/ValidationParticipantRepository.php
	lib/Doctrine/Types/Binary.php
	lib/Doctrine/Types/Blob.php
	lib/Doctrine/Types/Enum.php
	lib/Doctrine/Types/LongBlob.php
	lib/Doctrine/Types/VarBinary.php
	lib/classes/API/OAuth2/Account.php
	lib/classes/API/OAuth2/Application.php
	lib/classes/API/OAuth2/Application/OfficePlugin.php
	lib/classes/API/OAuth2/AuthCode.php
	lib/classes/API/OAuth2/RefreshToken.php
	lib/classes/API/OAuth2/Token.php
	lib/classes/API/V1/Abstract.php
	lib/classes/API/V1/Interface.php
	lib/classes/API/V1/adapter.php
	lib/classes/API/V1/exception/abstract.php
	lib/classes/API/V1/exception/badrequest.php
	lib/classes/API/V1/exception/forbidden.php
	lib/classes/API/V1/exception/internalservererror.php
	lib/classes/API/V1/exception/maintenance.php
	lib/classes/API/V1/exception/methodnotallowed.php
	lib/classes/API/V1/exception/notfound.php
	lib/classes/API/V1/exception/unauthorized.php
	lib/classes/API/V1/result.php
	lib/classes/Exception/Feed/EntryNotFound.php
	lib/classes/Exception/Feed/ItemNotFound.php
	lib/classes/Exception/Feed/PublisherNotFound.php
	lib/classes/Feed/Abstract.php
	lib/classes/Feed/Adapter.php
	lib/classes/Feed/Aggregate.php
	lib/classes/Feed/Collection.php
	lib/classes/Feed/CollectionInterface.php
	lib/classes/Feed/Entry/Adapter.php
	lib/classes/Feed/Entry/Collection.php
	lib/classes/Feed/Entry/CollectionInterface.php
	lib/classes/Feed/Entry/Interface.php
	lib/classes/Feed/Entry/Item.php
	lib/classes/Feed/Entry/ItemInterface.php
	lib/classes/Feed/Interface.php
	lib/classes/Feed/Link.php
	lib/classes/Feed/LinkInterface.php
	lib/classes/Feed/Publisher/Adapter.php
	lib/classes/Feed/Publisher/Interface.php
	lib/classes/Feed/Token.php
	lib/classes/Feed/TokenAggregate.php
	lib/classes/Feed/XML/Abstract.php
	lib/classes/Feed/XML/Atom.php
	lib/classes/Feed/XML/Cooliris.php
	lib/classes/Feed/XML/Interface.php
	lib/classes/Feed/XML/RSS.php
	lib/classes/Feed/XML/RSS/Image.php
	lib/classes/Feed/XML/RSS/ImageInterface.php
	lib/classes/User/Adapter.php
	lib/classes/User/Interface.php
	lib/classes/appbox/register.php
	lib/classes/connection.php
	lib/classes/connection/abstract.php
	lib/classes/connection/interface.php
	lib/classes/connection/pdo.php
	lib/classes/connection/pdoStatementDebugger.php
	lib/classes/deprecated/countries.php
	lib/classes/deprecated/inscript.api.php
	lib/classes/eventsmanager/event/test.php
	lib/classes/ftpclient.php
	lib/classes/http/request.php
	lib/classes/media/subdef.php
	lib/classes/module/console/schedulerStart.php
	lib/classes/module/console/schedulerState.php
	lib/classes/module/console/schedulerStop.php
	lib/classes/module/console/taskState.php
	lib/classes/module/console/tasklist.php
	lib/classes/module/console/taskrun.php
	lib/classes/patch/320alpha4b.php
	lib/classes/patch/3715alpha1a.php
	lib/classes/patch/379alpha1a.php
	lib/classes/patch/380alpha10a.php
	lib/classes/patch/380alpha11a.php
	lib/classes/patch/380alpha13a.php
	lib/classes/patch/380alpha14a.php
	lib/classes/patch/380alpha15a.php
	lib/classes/patch/380alpha16a.php
	lib/classes/patch/380alpha17a.php
	lib/classes/patch/380alpha18a.php
	lib/classes/patch/380alpha3a.php
	lib/classes/patch/380alpha4a.php
	lib/classes/patch/380alpha6a.php
	lib/classes/patch/380alpha8a.php
	lib/classes/patch/380alpha9a.php
	lib/classes/patch/381alpha1b.php
	lib/classes/patch/381alpha2a.php
	lib/classes/patch/381alpha3a.php
	lib/classes/patch/381alpha4a.php
	lib/classes/patch/383alpha1a.php
	lib/classes/patch/383alpha2a.php
	lib/classes/patch/383alpha3a.php
	lib/classes/patch/383alpha4a.php
	lib/classes/record/adapter.php
	lib/classes/record/preview.php
	lib/classes/recordutils.php
	lib/classes/recordutils/audio.php
	lib/classes/recordutils/document.php
	lib/classes/recordutils/map.php
	lib/classes/recordutils/video.php
	lib/classes/registry.php
	lib/classes/registryInterface.php
	lib/classes/set/order.php
	lib/classes/system/url.php
	lib/classes/task/Scheduler.php
	lib/classes/task/appboxAbstract.php
	lib/classes/task/databoxAbstract.php
	lib/classes/task/manager.php
	lib/classes/task/period/RecordMover.php
	lib/classes/task/period/apibridge.php
	lib/classes/task/period/apiwebhooks.php
	lib/classes/task/period/archive.php
	lib/classes/task/period/cindexer.php
	lib/classes/task/period/emptyColl.php
	lib/classes/task/period/ftp.php
	lib/classes/task/period/ftpPull.php
	lib/classes/task/period/subdef.php
	lib/classes/task/period/test.php
	lib/classes/task/period/writemeta.php
	lib/conf.d/PhraseaFixture/AbstractWZ.php
	lib/conf.d/PhraseaFixture/Basket/LoadFiveBaskets.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasket.php
	lib/conf.d/PhraseaFixture/Basket/LoadOneBasketEnv.php
	lib/conf.d/PhraseaFixture/Lazaret/LoadOneFile.php
	lib/conf.d/PhraseaFixture/Story/LoadOneStory.php
	lib/conf.d/PhraseaFixture/UsrLists/ListAbstract.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrList.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListEntry.php
	lib/conf.d/PhraseaFixture/UsrLists/UsrListOwner.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadOneParticipant.php
	lib/conf.d/PhraseaFixture/ValidationParticipant/LoadParticipantWithSession.php
	lib/conf.d/PhraseaFixture/ValidationSession/LoadOneValidationSession.php
	templates/web/admin/collection/collection.html.twig
	templates/web/common/dialog_export.html.twig
	templates/web/common/menubar.html.twig
	templates/web/prod/actions/Tools/index.html.twig
	templates/web/prod/index.html.twig
	templates/web/prod/upload/upload-flash.html.twig
	templates/web/prod/upload/upload.html.twig
	templates/web/report/report_layout_child.html.twig
	templates/web/setup/step2.html.twig
	templates/web/thesaurus/new-synonym-dialog.html.twig
	templates/web/thesaurus/properties.html.twig
	templates/web/thesaurus/search.html.twig
	tests/Alchemy/Tests/Phrasea/Application/ApiAbstract.php
	tests/Alchemy/Tests/Phrasea/Cache/FactoryTest.php
	tests/Alchemy/Tests/Phrasea/Controller/Admin/AdminCollectionTest.php
	tests/Alchemy/Tests/Phrasea/Controller/Client/RootTest.php
2015-02-05 18:38:49 +01:00

145 lines
4.0 KiB
PHP

<?php
/*
* This file is part of Phraseanet
*
* (c) 2005-2015 Alchemy
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Alchemy\Phrasea\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Process\ProcessBuilder;
class module_console_sphinxGenerateSuggestion extends Command
{
public function __construct($name = null)
{
parent::__construct($name);
$this->setDescription('Generates suggestions for Sphinx Search Engine, useful for query auto-completion');
return $this;
}
protected function doExecute(InputInterface $input, OutputInterface $output)
{
define('FREQ_THRESHOLD', 10);
define('SUGGEST_DEBUG', 0);
$params = phrasea::sbas_params($this->container);
foreach ($params as $sbas_id => $p) {
$index = sprintf("%u", crc32(
str_replace(
['.', '%']
, '_'
, sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname'])
)
));
$tmp_file = sys_get_temp_dir().'/dict' . $index . '.txt';
$databox = $this->getService('phraseanet.appbox')->get_databox($sbas_id);
$output->writeln("process Databox " . $databox->get_label($this->container['locale']) . " / $index\n");
if ( ! is_executable("/usr/local/bin/indexer")) {
$output->writeln("<error>'/usr/local/bin/indexer' is not executable</error>");
return 1;
}
$builder = ProcessBuilder::create(['/usr/local/bin/indexer']);
$builder->add('metadatas' . $index)
->add('--buildstops')
->add($tmp_file)
->add(1000000)
->add('--buildfreqs');
$builder->getProcess()->run();
if ( ! file_exists($tmp_file)) {
$output->writeln("<error> file '" . $tmp_file . "' does not exist</error>");
return 1;
}
try {
$connbas = $databox->get_connection()->connect();
} catch (\Exception $e) {
continue;
}
$sql = 'TRUNCATE suggest';
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
$sql = $this->BuildDictionarySQL($output, file_get_contents($tmp_file));
if (trim($sql) !== '') {
$stmt = $connbas->prepare($sql);
$stmt->execute();
$stmt->closeCursor();
}
unlink($tmp_file);
}
return 0;
}
protected function BuildTrigrams($keyword)
{
$t = "__" . $keyword . "__";
$trigrams = "";
for ($i = 0; $i < strlen($t) - 2; $i ++ )
$trigrams .= substr($t, $i, 3) . " ";
return $trigrams;
}
protected function BuildDictionarySQL(OutputInterface $output, $in)
{
$out = '';
$n = 0;
$lines = explode("\n", $in);
foreach ($lines as $line) {
if (trim($line) === '')
continue;
list ( $keyword, $freq ) = explode(" ", trim($line));
if ($freq < FREQ_THRESHOLD || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false)
continue;
if (ctype_digit($keyword)) {
continue;
}
if (mb_strlen($keyword) < 3) {
continue;
}
$trigrams = $this->BuildTrigrams($keyword);
if ($n ++)
$out .= ",\n";
$out .= "( $n, '$keyword', '$trigrams', $freq )";
}
if (trim($out) !== '') {
$out = "INSERT INTO suggest VALUES " . $out . ";";
}
$output->writeln(sprintf("Generated <info>%d</info> suggestions", $n));
return $out;
}
}