From 02695e1006f34dd3eb29e3f70d7bbcefab1e5854 Mon Sep 17 00:00:00 2001 From: Jean-Yves Gaulier Date: Mon, 30 Jul 2018 20:29:03 +0200 Subject: [PATCH] PHRAS-2161_coll-id-in-log-docs_MASTER moved the compute/fix "log_docs.coll_id" from patch to cli. --- bin/developer | 2 + .../Command/Developer/FixLogCollId.php | 373 ++++++++++++++++++ 2 files changed, 375 insertions(+) create mode 100644 lib/Alchemy/Phrasea/Command/Developer/FixLogCollId.php diff --git a/bin/developer b/bin/developer index 7a1f0888ae..15963da3f5 100755 --- a/bin/developer +++ b/bin/developer @@ -22,6 +22,7 @@ use Alchemy\Phrasea\Command\Developer\RoutesDumper; use Alchemy\Phrasea\Command\Developer\SetupTestsDbs; use Alchemy\Phrasea\Command\Developer\Uninstaller; use Alchemy\Phrasea\Command\Developer\TranslationDumper; +use Alchemy\Phrasea\Command\Developer\FixLogCollId; use Doctrine\DBAL\Tools\Console\Helper\ConnectionHelper; use Doctrine\ORM\Tools\Console\Helper\EntityManagerHelper; @@ -78,6 +79,7 @@ $cli->command(new Uninstaller()); $cli->command(new JsFixtures()); $cli->command(new IniReset()); $cli->command(new SetupTestsDbs()); +$cli->command(new FixLogCollId()); $cli->command(new \module_console_systemTemplateGenerator('system:generate-templates')); diff --git a/lib/Alchemy/Phrasea/Command/Developer/FixLogCollId.php b/lib/Alchemy/Phrasea/Command/Developer/FixLogCollId.php new file mode 100644 index 0000000000..ae29eebd31 --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/Developer/FixLogCollId.php @@ -0,0 +1,373 @@ +setDescription('Fix empty (null) coll_id in "log_docs" and "log_view" tables.'); + $this->addOption('databox', null, InputOption::VALUE_OPTIONAL, 'Mandatory : The id (or dbname or viewname) of the databox'); + $this->addOption('batchsize', null, InputOption::VALUE_OPTIONAL, 'work on a batch of n entries (default=100000)'); + $this->addOption('dry', null, InputOption::VALUE_NONE, 'dry run, list but don\'t act'); + $this->addOption('show_sql', null, InputOption::VALUE_NONE, 'show sql pre-selecting records'); + $this->addOption('keep_tmp_table', null, InputOption::VALUE_NONE, 'keep the working "tmp_coll" table (help debug)'); + + $this->setHelp("help"); + } + + /** + * merge options so one can mix csv-option and/or multiple options + * ex. with keepUnique = false : --opt=a,b --opt=c --opt=b ==> [a,b,c,b] + * ex. with keepUnique = true : --opt=a,b --opt=c --opt=b ==> [a,b,c] + * + * @param InputInterface $input + * @param string $optionName + * @param int $option + * @return array + */ + private function getOptionAsArray(InputInterface $input, $optionName, $option) + { + $ret = []; + foreach($input->getOption($optionName) as $v0) { + foreach(explode(',', $v0) as $v) { + $v = trim($v); + if($option & self::OPTION_ALL_VALUES || !in_array($v, $ret)) { + $ret[] = $v; + } + } + } + + return $ret; + } + + /** + * print a string if verbosity >= verbose (-v) + * @param string $s + */ + private function verbose($s) + { + if($this->output->getVerbosity() >= OutputInterface::VERBOSITY_VERBOSE) { + $this->output->write($s); + } + } + + /** + * sanity check the cmd line options + * + * @param InputInterface $input + * @param OutputInterface $output + * @return bool + */ + protected function sanitizeArgs(InputInterface $input, OutputInterface $output) + { + $argsOK = true; + + // find the databox / collection by id or by name + $this->databoxes = []; + if(!is_null($d = $input->getOption('databox'))) { + $d = trim($d); + } + foreach ($this->container->getDataboxes() as $db) { + if(is_null($d)){ + $this->databoxes[] = $db; + } + else { + if ($db->get_sbas_id() == (int)$d || $db->get_viewname() == $d || $db->get_dbname() == $d) { + $this->databoxes[] = $db; + } + } + } + if (empty($this->databoxes)) { + if(is_null($d)) { + $output->writeln(sprintf("No databox found", $d)); + } + else { + $output->writeln(sprintf("Unknown databox \"%s\"", $d)); + } + $argsOK = false; + } + + // get options + $this->batchsize = $input->getOption('batchsize'); + $this->show_sql = $input->getOption('show_sql') ? true : false; + $this->dry = $input->getOption('dry') ? true : false; + $this->keep_tmp_table = $input->getOption('keep_tmp_table') ? true : false; + + if(is_null($this->batchsize)) { + $this->batchsize = 100000; + } + if($this->batchsize < 1) { + $output->writeln(sprintf('batchsize must be > 0')); + $argsOK = false; + } + + return $argsOK; + } + + /** + * {@inheritdoc} + */ + protected function doExecute(InputInterface $input, OutputInterface $output) + { + $time_start = new \DateTime(); + + if(!$this->sanitizeArgs($input, $output)) { + return -1; + } + + $this->input = $input; + $this->output = $output; + + $tsql = [ + 'count' => [ + 'msg' => "Count work to do", + 'sql' => "SELECT\n" + . " SUM(IF(ISNULL(`coll_id`), 0, 1)) AS `n`,\n" + . " COUNT(*) AS `t`,\n" + . " SUM(IF(`coll_id`>0, 1, 0)) AS `p`,\n" + . " SUM(IF(`coll_id`=0, 1, 0)) AS `z`\n" + . " FROM `log_docs`", + 'fetch' => false, + 'code' => function($stmt) { + $row = $stmt->fetch(); + if(is_null($row['n'])) { + // no coll_id ? + $this->output->writeln(sprintf("The \"log_docs\" table has no \"coll_id\" column ? Please apply patch 410alpha12a")); + $this->again = false; + } + $this->output->writeln(""); + $this->output->writeln(sprintf("done: %s / %s (fixed: %s ; can't fix: %s)", $row['n'], $row['t'], $row['p'], $row['z'])); + }, + 'playdry' => self::PLAYDRY_SQL | self::PLAYDRY_CODE, + ], + + 'minmax' => [ + 'msg' => "Get a batch", + 'sql' => "SELECT MIN(`id`) AS `minid`, MAX(`id`) AS `maxid` FROM\n" + . " (SELECT `id` FROM `log_docs` WHERE ISNULL(`coll_id`) ORDER BY `id` DESC LIMIT " . $this->batchsize . ") AS `t", + 'fetch' => false, + 'code' => function($stmt) { + $row = $stmt->fetch(); + $this->output->writeln(""); + $this->output->writeln(sprintf("minid: %s ; maxid : %s\n", is_null($row['minid']) ? 'null' : $row['minid'], is_null($row['maxid']) ? 'null' : $row['maxid'])); + if (is_null($row['minid']) || is_null($row['maxid'])) { + $this->again = false; + } + $this->parm_v = [(int)$row['minid'], (int)$row['maxid']]; + }, + 'playdry' => self::PLAYDRY_SQL | self::PLAYDRY_CODE, + ], + + 'trunc' => [ + 'msg' => "Empty working table", + 'sql' => "TRUNCATE TABLE `tmp_colls`", + 'fetch' => false, + 'code' => function($row) {}, + 'playdry' => self::PLAYDRY_NONE, + ], + + 'offset' => [ + 'msg' => "Make room for \"collection_from\" actions", + 'sql' => "UPDATE `log_docs` SET `id`=`id` * 2 WHERE `id` >= :minid AND `id` <= :maxid ORDER BY `id` DESC", + 'fetch' => false, + 'code' => function($stmt) { + // fix new minmax values since id was changed + $this->parm_v = [$this->parm_v[0] << 1, $this->parm_v[1] << 1]; + }, + 'playdry' => self::PLAYDRY_CODE, + ], + + 'compute' => [ + 'msg' => "Compute coll_id to working table", + 'sql' => "INSERT INTO `tmp_colls`\n" + . " SELECT `record_id`, `r1_id` AS `from_id`, `r1_date` AS `from_date`, MIN(`r2_id`) AS `to_id`, MIN(`r2_date`) AS `to_date`, `r1_final` AS `coll_id` FROM\n" + . " (\n" + . " SELECT `r1`.`record_id`, `r1`.`id` AS `r1_id`, `r1`.`date` AS `r1_date`, `r1`.`final` AS `r1_final`, `r2`.`id` AS `r2_id`, `r2`.`date` AS `r2_date` FROM\n" + . " (SELECT `id`, `date`, `record_id`, `action`, `final` FROM `log_docs` WHERE `action` IN('add', 'collection') AND `id` >= :minid AND `id` <= :maxid) AS `r1`\n" + . " LEFT JOIN `log_docs` AS `r2`\n" + . " ON `r2`.`record_id`=`r1`.`record_id` AND `r2`.`action`='collection' AND `r2`.`id`>`r1`.`id`\n" + . " )\n" + . " AS `t` GROUP BY `r1_id` ORDER BY `record_id` ASC, `from_id` ASC", + 'fetch' => false, + 'code' => function($stmt) {}, + 'playdry' => self::PLAYDRY_NONE, + ], + + 'copy_result' => [ + 'msg' => "Copy result back to \"log_docs\"", + 'sql' => "UPDATE `tmp_colls` INNER JOIN `log_docs`\n" + . " ON `tmp_colls`.`record_id` = `log_docs`.`record_id`\n" + . " AND `log_docs`.`id` >= `tmp_colls`.`from_id`\n" + . " AND (`log_docs`.`id` < `tmp_colls`.`to_id` OR ISNULL(`tmp_colls`.`to_id`))\n" + . " SET `log_docs`.`coll_id` = `tmp_colls`.`coll_id", + 'fetch' => false, + 'code' => function($stmt) {}, + 'playdry' => self::PLAYDRY_NONE, + ], + + 'collection_from' => [ + 'msg' => "Insert \"collection_from\" actions", + 'sql' => "INSERT INTO `log_docs` (`id`, `log_id`, `date`, `record_id`, `action`, `final`, `coll_id`)\n" + . " SELECT `r1`.`id`-1 AS `id`, `r1`.`log_id`, `r1`.`date`, `r1`.`record_id`, 'collection_from' AS `action`, `r1`.`final`,\n" + . " SUBSTRING_INDEX(GROUP_CONCAT(`r2`.`coll_id` ORDER BY `r1`.`id` DESC), ',', 1) AS `coll_id`\n" + . " FROM `log_docs` AS `r1` LEFT JOIN `log_docs` AS `r2`\n" + . " ON `r2`.`record_id`=`r1`.`record_id` AND `r2`.`id` < `r1`.`id` AND `r2`.`action` IN('collection', 'add')\n" + . " WHERE `r1`.`action` = 'collection' AND `r1`.`id` >= :minid AND `r1`.`id` <= :maxid\n" + . " GROUP BY `r1`.`id`", + 'fetch' => false, + 'code' => function($stmt) {}, + 'playdry' => self::PLAYDRY_NONE, + ], + + 'fix_unfound' => [ + 'msg' => "Set missing coll_id to 0", + 'sql' => "UPDATE `log_docs` SET `coll_id`=0 WHERE `id` >= :minid AND `id` <= :maxid AND ISNULL(`coll_id`)", + 'fetch' => false, + 'code' => function($stmt) {}, + 'playdry' => self::PLAYDRY_NONE, + ], + ]; + + + + foreach($this->databoxes as $databox) { + + $this->output->writeln(""); + $this->output->writeln(sprintf("================================ Working on databox %s (id:%s) ================================", $databox->get_dbname(), $databox->get_sbas_id())); + + $sql = "CREATE " . ($this->keep_tmp_table ? "TABLE IF NOT EXISTS" : "TEMPORARY TABLE") . " `tmp_colls` (\n" + . " `record_id` int(11) unsigned NOT NULL,\n" + . " `from_id` int(11) unsigned NOT NULL,\n" + . " `from_date` datetime NOT NULL,\n" + . " `to_id` int(11) unsigned DEFAULT NULL,\n" + . " `to_date` datetime DEFAULT NULL,\n" + . " `coll_id` int(10) unsigned NOT NULL,\n" + . " KEY `record_id` (`record_id`),\n" + . " KEY `from_id` (`from_id`),\n" + . " KEY `from_date` (`from_date`),\n" + . " KEY `to_id` (`to_id`),\n" + . " KEY `to_date` (`to_date`)\n" + . ") ENGINE=InnoDB;"; + + $this->output->writeln(""); + $this->output->writeln(sprintf(" ----------------- Creating working %s table -----------------", ($this->keep_tmp_table ? "(temporary)" : ""))); + $this->output->writeln(""); + if($this->show_sql) { + $this->output->writeln($sql); + } + $stmt = $databox->get_connection()->prepare($sql); + $stmt->execute(); + $stmt->closeCursor(); + + for ($this->again=true; $this->again; ) { + + foreach($tsql as $work) { + if(!$this->again) { + break; + } + $this->output->writeln(""); + $this->output->writeln(sprintf(" ----------------- %s ----------------- %s", + $work['msg'], + $this->dry && !($work['playdry'] & self::PLAYDRY_SQL) ? " -- NOT PLAYED IN DRY MODE --" : "" + ) + ); + $this->output->writeln(""); + + $sql = str_replace($this->parm_k, $this->parm_v, $work['sql']); + if ($this->show_sql) { + $this->output->writeln($sql); + } + $stmt = null; + if(!$this->dry || ($work['playdry'] & self::PLAYDRY_SQL)) { + $stmt = $databox->get_connection()->prepare($sql); + $stmt->execute(); + } + if(!$this->dry || ($work['playdry'] & self::PLAYDRY_CODE)) { + $code = $work['code']; + $code($stmt); + } + if(!$this->dry || ($work['playdry'] & self::PLAYDRY_SQL)) { + $stmt->closeCursor(); + } + } + + if($this->dry) { + // since there is no change it may loop forever + $this->again = false; + } + + } + + if (!$this->keep_tmp_table) { + $this->output->writeln(sprintf(" ----------------- Drop working table -----------------")); + $sql = "DROP TABLE `tmp_colls`"; + if($this->show_sql) { + $this->output->writeln($sql); + } + $stmt = $databox->get_connection()->prepare($sql); + $stmt->execute(); + $stmt->closeCursor(); + } + + $this->output->writeln(""); + } + + return 0; + } + + private function runSQL($work) + { + + } +}