globalConfiguration = $globalConfiguration;
$this->job_conf = $job_conf;
$this->unicode = $unicode;
$this->output = $output;
if (array_key_exists('active', $job_conf) && $job_conf['active'] === false) {
$this->active = false;
return;
}
$this->errors = [];
foreach (['active', 'databox', 'source_field', 'destination_fields'] as $mandatory) {
if (!isset($job_conf[$mandatory])) {
$this->errors[] = sprintf("Missing mandatory setting (%s).", $mandatory);
}
}
if (!empty($this->errors)) {
return;
}
if (!($this->databox = $globalConfiguration->getDatabox($job_conf['databox']))) {
$this->errors[] = sprintf("unknown databox (%s).", $job_conf['databox']);
return;
}
if(array_key_exists('set_collection', $job_conf)) {
if(!($this->setCollection = $globalConfiguration->getCollection($this->databox->get_sbas_id(), $job_conf['set_collection']))) {
$this->errors[] = sprintf("unknown setCollection (%s).", $job_conf['set_collection']);
return;
}
}
if(array_key_exists('set_status', $job_conf)) {
$this->setStatus = $job_conf['set_status'];
}
$cnx = $this->databox->get_connection();
// get infos about the "source_field"
//
$sql = "SELECT `id`, `tbranch` FROM `metadatas_structure` WHERE `name` = :name AND `tbranch` != ''";
$stmt = $cnx->executeQuery($sql, [':name' => $job_conf['source_field']]);
$this->source_field = $stmt->fetch(PDO::FETCH_ASSOC);
$stmt->closeCursor();
if (!$this->source_field) {
$this->errors[] = sprintf("field (%s) not found or not linked to thesaurus.", $job_conf['source_field']);
return;
}
$this->source_field['lng'] = array_key_exists('source_lng', $job_conf) ? $job_conf['source_lng'] : null;
$this->selectRecordFieldIds[] = $this->source_field['id'];
$this->xpathTh = $this->databox->get_xpath_thesaurus();
$this->tbranches = $this->xpathTh->query($this->source_field['tbranch']);
if (!$this->tbranches || $this->tbranches->length <= 0) {
$this->errors[] = sprintf("thesaurus branch(es) (%s) not found.", $this->source_field['tbranch']);
return;
}
// get infos about the "destination_fields"
//
$this->destination_fields = [];
$sql = "SELECT `id`, `name` FROM `metadatas_structure` WHERE `name` = :name ";
$stmt = $cnx->prepare($sql);
foreach ($job_conf['destination_fields'] as $tf) {
list($lng, $fname) = explode(':', $tf);
$stmt->execute([':name' => $fname]);
if (!($row = $stmt->fetch(PDO::FETCH_ASSOC))) {
$this->output->writeln(sprintf("undefined field (%s) (ignored).", $fname));
continue;
}
$this->destination_fields[$lng] = $row;
$stmt->closeCursor();
$this->selectRecordFieldIds[] = $row['id'];
}
if (empty($this->destination_fields)) {
$this->errors[] = sprintf("no \"destination_field\" found.");
return;
}
// misc settings
$this->cleanupDestination = array_key_exists('cleanup_destination', $job_conf) && $job_conf['cleanup_destination'] === true;
$this->cleanupSource = array_key_exists('cleanup_source', $job_conf) ? $job_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE;
// build records select sql
//
$selectRecordClauses = [];
$this->selectRecordParams = [];
if (array_key_exists('if_collection', $job_conf)) {
if (!($coll = $globalConfiguration->getCollection($job_conf['databox'], $job_conf['if_collection']))) {
$this->errors[] = sprintf("unknown collection (%s)", $job_conf['if_collection']);
return;
}
$selectRecordClauses[] = "`coll_id` = :coll_id";
$this->selectRecordParams[':coll_id'] = $coll->get_coll_id();
}
if (array_key_exists('if_status', $job_conf)) {
$selectRecordClauses[] = "`status` & b:sb_and = b:sb_equ";
$this->selectRecordParams[':sb_and'] = str_replace(['0', 'x'], ['1', '0'], $job_conf['if_status']);
$this->selectRecordParams[':sb_equ'] = str_replace('x', '0', $job_conf['if_status']);
}
$selectRecordClauses[] = "`meta_struct_id` IN ("
. join(
',',
array_map(function ($id) use ($cnx) {
return $cnx->quote($id);
}, $this->selectRecordFieldIds)
)
. ")";
$sql = "SELECT `record_id`, `meta_struct_id`, `metadatas`.`id` AS meta_id, `value` FROM";
$sql .= " `record` INNER JOIN `metadatas` USING(`record_id`)";
$sql .= " WHERE " . join(" AND ", $selectRecordClauses);
$sql .= " ORDER BY `record_id` ASC";
$this->selectRecordsSql = $sql;
}
public function run()
{
$cnx = $this->databox->get_connection();
$stmt = $cnx->executeQuery($this->selectRecordsSql, $this->selectRecordParams);
$currentRid = '?';
$this->recordsDone = 0;
$this->notTranslated = [];
$this->incompletelyTranslated = [];
$this->fullyTranslated = [];
$metas = $emptyValues = array_map(function () {
return [];
}, array_flip($this->selectRecordFieldIds));
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
if ($currentRid == '?') {
$currentRid = $row['record_id'];
}
if ($row['record_id'] !== $currentRid) {
// change record
$this->doRecord($currentRid, $metas); // flush previous record
$currentRid = $row['record_id'];
$metas = $emptyValues;
}
$metas[$row['meta_struct_id']][$row['meta_id']] = $row['value'];
}
if($currentRid !== '?') {
$this->doRecord($currentRid, $metas); // flush last record
}
$stmt->closeCursor();
// condensed report
//
if($this->globalConfiguration->getReportFormat() === 'condensed') {
$this->output->writeln(sprintf("%d records done.", $this->recordsDone));
if(!empty($this->notTranslated)) {
ksort($this->notTranslated, SORT_STRING|SORT_FLAG_CASE);
$this->output->writeln("Not translated terms:");
foreach ($this->notTranslated as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
}
}
if(!empty($this->incompletelyTranslated)) {
ksort($this->incompletelyTranslated, SORT_STRING|SORT_FLAG_CASE);
$this->output->writeln("Incompletely translated terms:");
foreach ($this->incompletelyTranslated as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
}
}
if(!empty($this->fullyTranslated)) {
ksort($this->fullyTranslated, SORT_STRING|SORT_FLAG_CASE);
$this->output->writeln("Fully translated terms:");
foreach ($this->fullyTranslated as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
}
}
}
}
private function doRecord($record_id, $metas)
{
$reportFormat = $this->globalConfiguration->getReportFormat();
if($reportFormat !== 'condensed') {
$this->output->writeln(sprintf("record id: %s", $record_id));
}
$source_field_id = $this->source_field['id'];
$meta_to_delete = []; // key = id, to easily keep unique
$meta_to_add = [];
if ($this->cleanupDestination) {
foreach ($this->destination_fields as $lng => $destination_field) {
$destination_field_id = $destination_field['id'];
foreach ($metas[$destination_field_id] as $meta_id => $value) {
$meta_to_delete[$meta_id] = $value;
}
unset($meta_id, $value);
}
unset($lng, $destination_field, $destination_field_id);
}
// loop on every value of the "source_field"
//
foreach ($metas[$source_field_id] as $source_meta_id => $source_value) {
$t = $this->splitTermAndContext($source_value);
$q = '@w=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\'';
if ($t[1]) {
$q .= ' and @k=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\'';
}
if(!is_null($this->source_field['lng'])) {
$q .= ' and @lng=\'' . \thesaurus::xquery_escape($this->source_field['lng']) . '\'';
}
$q = '//sy[' . $q . ']/../sy';
unset($t);
// loop on every tbranch (one field may be linked to many branches)
//
$translations = []; // ONE translation per lng (first found in th)
/** @var DOMNode $tbranch */
foreach ($this->tbranches as $tbranch) {
if (!($nodes = $this->xpathTh->query($q, $tbranch))) {
$this->output->writeln(sprintf(" - \"%s\" xpath error on (%s), ignored.", $source_value, $q));
continue;
}
// loop on every synonym
//
/** @var DOMElement $node */
foreach ($nodes as $node) {
$lng = $node->getAttribute('lng');
// ignore synonyms not in one of the "destination_field" languages
//
if (!array_key_exists($lng, $this->destination_fields)) {
continue;
}
$translated_value = $node->getAttribute('v');
$destination_field_id = $this->destination_fields[$lng]['id'];
if (!array_key_exists($lng, $translations)) {
if (($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false) {
$translations[$lng] = [
'val' => $translated_value,
'id' => null,
'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name'])
];
$meta_to_add[$destination_field_id][] = $translated_value;
}
else {
$translations[$lng] = [
'val' => $translated_value,
'id' => $destination_meta_id,
'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name'])
];
unset($meta_to_delete[$destination_meta_id]);
}
unset($destination_meta_id);
}
unset($lng, $destination_field_id, $translated_value);
}
unset($nodes, $node, $tbranch);
}
unset($q);
// cleanup source
//
if (empty($translations)) {
if($reportFormat === 'all') {
$this->output->writeln(sprintf(" - \"%s\" : no translation found.", $source_value));
}
$this->addToCondensedReport($source_value, $this->notTranslated);
}
else if (count($translations) < count($this->destination_fields)) {
if(in_array($reportFormat, ['all', 'translated'])) {
$this->output->writeln(sprintf(" - \"%s\" : incomplete translation.", $source_value));
}
$this->addToCondensedReport($source_value, $this->incompletelyTranslated);
}
else {
// complete translation (all target lng)
if(in_array($reportFormat, ['all', 'translated'])) {
$this->output->writeln(sprintf(" - \"%s\" :", $source_value));
}
$this->addToCondensedReport($source_value, $this->fullyTranslated);
if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) {
// do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
$used = false;
foreach($translations as $l => $t) {
if($t['id'] === $source_meta_id) {
$used = true;
break;
}
}
if(!$used) {
$meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
}
}
}
if(in_array($reportFormat, ['all', 'translated'])) {
foreach ($translations as $lng => $translation) {
$this->output->writeln(sprintf(" - [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg']));
}
}
if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) {
// do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
$used = false;
foreach($translations as $l => $t) {
if($t['id'] === $source_meta_id) {
$used = true;
break;
}
}
if(!$used) {
$meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
}
}
unset($lng, $translations, $translation);
}
unset($metas, $source_meta_id, $source_value);
$actions = [];
$metadatas = [];
foreach ($meta_to_delete as $id => $value) {
$metadatas[] = [
'action' => "delete",
'meta_id' => $id,
'_value_' => $value
];
}
foreach($meta_to_add as $struct_id => $values) {
$metadatas[] = [
'action' => "add",
'meta_struct_id' => $struct_id,
'value' => $values
];
}
if(!empty($metadatas)) {
$actions['metadatas'] = $metadatas;
}
unset($metadatas);
if(!is_null($this->setCollection)) {
$actions['base_id'] = $this->setCollection->get_base_id();
}
if(!is_null($this->setStatus)) {
$status = [];
foreach(str_split(strrev($this->setStatus), 1) as $bit => $v) {
if($v === '0' || $v === '1') {
$status[] = [
'bit' => $bit,
'state' => $v === '1'
];
}
}
if(!empty($status)) {
$actions['status'] = $status;
}
}
$jsActions = json_encode($actions, JSON_PRETTY_PRINT);
if($this->output->getVerbosity() >= OutputInterface::VERBOSITY_VERY_VERBOSE) {
$this->output->writeln(sprintf("JS : %s", $jsActions));
}
if (!$this->globalConfiguration->isDryRun()) {
$record = $this->getDatabox()->getRecordRepository()->find($record_id);
$record->setMetadatasByActions(json_decode($jsActions));
}
$this->recordsDone++;
}
private function addToCondensedReport($term, &$where)
{
if($this->globalConfiguration->getReportFormat() !== 'condensed') {
return;
}
if(!array_key_exists($term, $where)) {
$where[$term] = 0;
}
$where[$term]++;
}
private function splitTermAndContext($word)
{
$term = trim($word);
$context = '';
if (($po = strpos($term, '(')) !== false) {
if (($pc = strpos($term, ')', $po)) !== false) {
$context = trim(substr($term, $po + 1, $pc - $po - 1));
$term = trim(substr($term, 0, $po));
}
else {
$context = trim(substr($term, $po + 1));
$term = trim(substr($term, 0, $po));
}
}
return [$term, $context];
}
/**
* @return string[]
*/
public function getErrors(): array
{
return $this->errors;
}
public function isValid(): bool
{
return empty($this->errors);
}
/**
* @return databox|null
*/
public function getDatabox()
{
return $this->databox;
}
/**
* @return bool
*/
public function isActive(): bool
{
return $this->active;
}
}