778 lines
50 KiB
PHP
778 lines
50 KiB
PHP
<?php
|
|
|
|
// *******************************************************************************
|
|
//
|
|
// Included within do_search() for keywords processing and assembly into SQL
|
|
//
|
|
// *******************************************************************************
|
|
|
|
// Candidate for replacement function including the variables needed.
|
|
// function search_process_keyword($keyword, &$n, &$c, &$keywords, &$hidden_indexed_fields, &$search, &$searchidmatch, &$restypenames, &$ignore_filters, &$node_bucket, &$stats_logging, &$go, &$sql_keyword_union, &$sql_keyword_union_aggregation, &$sql_keyword_union_criteria, &$sql_keyword_union_or, &$omit, &$fullmatch, &$suggested, &$sql_join, &$sql_filter)
|
|
|
|
$keywords_used = [];
|
|
|
|
if ($keysearch) {
|
|
$date_parts = [];
|
|
for ($n = 0; $n < count($keywords); $n++) {
|
|
$canskip = false;
|
|
$search_field_restrict = "";
|
|
$keyword = $keywords[$n];
|
|
debug("do_search(): \$keyword = {$keyword}");
|
|
$quoted_string = (substr($keyword, 0, 1) == "\"" || substr($keyword, 0, 2) == "-\"" ) && substr($keyword, -1, 1) == "\"";
|
|
|
|
$quoted_field_match = false;
|
|
$field_short_name_specified = false;
|
|
|
|
// Extra sql to search non-field data that used to be stored in resource_keyword e.g. resource type/resource contributor
|
|
$non_field_keyword_sql = new PreparedStatementQuery();
|
|
|
|
if ($quoted_string && substr($keyword, 1, strlen(FULLTEXT_SEARCH_PREFIX)) == FULLTEXT_SEARCH_PREFIX) {
|
|
// Full text search
|
|
$fulltext_string = str_replace(FULLTEXT_SEARCH_QUOTES_PLACEHOLDER, "\"", substr($keyword, strlen(FULLTEXT_SEARCH_PREFIX) + 2, -1));
|
|
if (strpos($fulltext_string, "@") !== false) {
|
|
// There's an @ character in the fulltext search which InnoDB does not permit, so quote-wrap the search string
|
|
$fulltext_string = "'\"" . $fulltext_string . "\"'";
|
|
}
|
|
|
|
$freetextunion = new PreparedStatementQuery();
|
|
$freetextunion->sql = " SELECT resource, [bit_or_condition] 1 AS score FROM resource_node rn LEFT JOIN node n ON n.ref=rn.node WHERE MATCH(name) AGAINST (? IN BOOLEAN MODE)";
|
|
$freetextunion->parameters = ["s",$fulltext_string];
|
|
if (count($hidden_indexed_fields) > 0) {
|
|
$freetextunion->sql .= " AND n.resource_type_field NOT IN (" . ps_param_insert(count($hidden_indexed_fields)) . ")";
|
|
$freetextunion->parameters = array_merge($freetextunion->parameters, ps_param_fill($hidden_indexed_fields, "i"));
|
|
}
|
|
|
|
$sql_keyword_union[] = $freetextunion;
|
|
$sql_keyword_union_aggregation[] = "BIT_OR(`keyword_[union_index]_found`) AS `keyword_[union_index]_found`";
|
|
$sql_keyword_union_or[] = false;
|
|
$sql_keyword_union_criteria[] = "`h`.`keyword_[union_index]_found`";
|
|
continue;
|
|
}
|
|
|
|
if ($keyword == $search && is_int_loose($keyword) && $searchidmatch) {
|
|
// Resource ID is no longer indexed, if search is just for a single integer then include this
|
|
$non_field_keyword_sql->sql .= " UNION (SELECT " . (int)$keyword . " AS resource, [bit_or_condition] 1 AS score)";
|
|
$canskip = true;
|
|
} elseif (in_array(mb_strtolower($keyword), array_map("mb_strtolower", array_column($restypenames, "name")))) {
|
|
// Resource type is no longer actually indexed but this will still honour the config by including in search
|
|
$non_field_keyword_sql->sql .= " UNION (SELECT r.ref AS resource, [bit_or_condition] 1 AS score FROM resource r LEFT JOIN resource_type rt ON r.resource_type=rt.ref WHERE r.ref > 0 AND rt.name LIKE ?)";
|
|
array_push($non_field_keyword_sql->parameters, "s", $keyword);
|
|
$canskip = true;
|
|
}
|
|
if ($index_contributed_by) {
|
|
// Resource type is no longer actually indexed but this will still honour the config by including in search
|
|
$matchusers = get_users(0, $keyword, "u.username", true);
|
|
if (count($matchusers) > 0) {
|
|
$user_sql = get_users(0, $keyword, "u.username", true, -1, "", true);
|
|
$non_field_keyword_sql->sql .= " UNION (
|
|
SELECT r.ref AS resource, [bit_or_condition] 1 AS score
|
|
FROM resource r
|
|
WHERE
|
|
r.created_by IN (SELECT u.ref FROM (" . $user_sql->sql . " ) as u)
|
|
AND r.ref >0
|
|
)";
|
|
$non_field_keyword_sql->parameters = array_merge(
|
|
$non_field_keyword_sql->parameters,
|
|
$user_sql->parameters
|
|
);
|
|
$canskip = true;
|
|
}
|
|
}
|
|
|
|
if (
|
|
(
|
|
!$quoted_string
|
|
|| ($quoted_string // If quoted string with a field specified we first need to try and resolve it to a node instead of working out keyword positions etc.
|
|
&& strpos($keyword, ":") !== false)
|
|
)
|
|
&& (substr($keyword, 0, 1) != "!"
|
|
|| substr($keyword, 0, 6) == "!empty")
|
|
) {
|
|
$keywordprocessed = false;
|
|
|
|
if (strpos($keyword, ":") !== false) {
|
|
$field_short_name_specified = true;
|
|
$kw = explode(":", ($quoted_string ? substr($keyword, 1, -1) : $keyword), 2);
|
|
# Fetch field info
|
|
global $fieldinfo_cache;
|
|
$fieldname = $kw[0];
|
|
$keystring = $kw[1];
|
|
debug("do_search(): \$fieldname = {$fieldname}");
|
|
debug("do_search(): \$keystring = {$keystring}");
|
|
if (isset($fieldinfo_cache[$fieldname])) {
|
|
$fieldinfo = $fieldinfo_cache[$fieldname];
|
|
} else {
|
|
$fieldinfo = ps_query("SELECT ref, `type` FROM resource_type_field WHERE name = ?", ["s",$fieldname], "schema");
|
|
|
|
// Checking for date from Simple Search will result with a fieldname like 'year' which obviously does not exist
|
|
if (0 === count($fieldinfo) && ('basicyear' == $kw[0] || 'basicmonth' == $kw[0] || 'basicday' == $kw[0])) {
|
|
$fieldinfo = ps_query("SELECT ref, `type` FROM resource_type_field WHERE ref = ?", ["i",$date_field], "schema");
|
|
}
|
|
if (0 === count($fieldinfo)) {
|
|
// Search may just happen to include a colon - treat the colon as a space and add to $keywords array to process separately
|
|
$addedkeywords = explode(":", $keyword);
|
|
$keywords = array_merge($keywords, $addedkeywords);
|
|
continue;
|
|
} else {
|
|
$fieldinfo = $fieldinfo[0];
|
|
$fieldinfo_cache[$fieldname] = $fieldinfo;
|
|
}
|
|
}
|
|
if (!metadata_field_view_access($fieldinfo['ref'])) {
|
|
// User can't search against a metadata field they don't have access to
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//First try and process special keyword types
|
|
if ($field_short_name_specified && !$quoted_string && !$ignore_filters && isset($fieldinfo['type']) && in_array($fieldinfo['type'], $DATE_FIELD_TYPES)) {
|
|
// ********************************************************************************
|
|
// Date field keyword
|
|
// ********************************************************************************
|
|
|
|
global $datefieldinfo_cache;
|
|
if (isset($datefieldinfo_cache[$fieldname])) {
|
|
$datefieldinfo = $datefieldinfo_cache[$fieldname];
|
|
} else {
|
|
$datefieldinfo = ps_query("SELECT ref FROM resource_type_field WHERE name = ? AND type IN (" . FIELD_TYPE_DATE_AND_OPTIONAL_TIME . "," . FIELD_TYPE_EXPIRY_DATE . "," . FIELD_TYPE_DATE . "," . FIELD_TYPE_DATE_RANGE . ")", ["s",$fieldname], "schema");
|
|
$datefieldinfo_cache[$fieldname] = $datefieldinfo;
|
|
}
|
|
|
|
if (count($datefieldinfo) && substr($keystring, 0, 5) != "range") {
|
|
$c++;
|
|
$datefieldinfo = $datefieldinfo[0];
|
|
$datefield = $datefieldinfo["ref"];
|
|
|
|
$val = str_replace("n", "_", $keystring);
|
|
$val = str_replace("|", "-", $val);
|
|
|
|
if ($fieldinfo['type'] == FIELD_TYPE_DATE_RANGE) {
|
|
// Find where the searched value is between the range values
|
|
$sql_join->sql .= " JOIN resource_node drrn" . $c . "s ON drrn" . $c . "s.resource=r.ref JOIN node drn" . $c . "s ON drn" . $c . "s.ref=drrn" . $c . "s.node AND drn" . $c . "s.resource_type_field = ? AND drn" . $c . "s.name >= ? JOIN resource_node drrn" . $c . "e ON drrn" . $c . "e.resource=r.ref JOIN node drn" . $c . "e ON drn" . $c . "e.ref=drrn" . $c . "e.node AND drn" . $c . "e.resource_type_field = ? AND DATE(drn" . $c . "e.name) <= ?";
|
|
array_push($sql_join->parameters, "i", $datefield, "i", $datefield, "s", $val, "s", $val);
|
|
} else {
|
|
$sql_join->sql .= " JOIN resource_node rnd" . $c . " ON rnd" . $c . ".resource=r.ref JOIN node dn" . $c . " ON dn" . $c . ".ref=rnd" . $c . ".node AND dn" . $c . ".resource_type_field = ?";
|
|
array_push($sql_join->parameters, "i", $datefield);
|
|
|
|
$sql_filter->sql .= ($sql_filter->sql != "" ? " AND " : "") . "dn" . $c . ".name like ?";
|
|
array_push($sql_filter->parameters, "s", $val . "%");
|
|
}
|
|
|
|
|
|
// Find where the searched value is LIKE the range values
|
|
} elseif (in_array($kw[0], array("basicday","basicmonth","basicyear"))) {
|
|
$c++;
|
|
if (!isset($datefieldjoin)) {
|
|
// We only want to join once to the date_field
|
|
$sql_join->sql .= " JOIN resource_node rdnf" . $c . " ON rdnf" . $c . ".resource=r.ref JOIN node rdn" . $c . " ON rdnf" . $c . ".node=rdn" . $c . ".ref AND rdn" . $c . ".resource_type_field = ?";
|
|
$datefieldjoin = $c;
|
|
array_push($sql_join->parameters, "i", $date_field);
|
|
}
|
|
|
|
if ('basicday' == $kw[0]) {
|
|
$date_parts['day'] = $keystring;
|
|
} elseif ('basicmonth' == $kw[0]) {
|
|
$date_parts['month'] = $keystring;
|
|
} elseif ('basicyear' == $kw[0]) {
|
|
$date_parts['year'] = $keystring;
|
|
}
|
|
} elseif (count($datefieldinfo) && substr($keystring, 0, 5) == "range") {
|
|
# Additional date range filtering
|
|
$c++;
|
|
$rangestring = substr($keystring, 5);
|
|
if (strpos($rangestring, "start") !== false) {
|
|
$rangestartpos = strpos($rangestring, "start") + 5;
|
|
$rangestart = str_replace(" ", "-", substr($rangestring, $rangestartpos, strpos($rangestring, "end") ? strpos($rangestring, "end") - $rangestartpos : 10));
|
|
}
|
|
if (strpos($keystring, "end") !== false) {
|
|
$rangeend = str_replace(" ", "-", $rangestring);
|
|
$rangeend = substr($rangeend, strpos($rangeend, "end") + 3, 10) . " 23:59:59";
|
|
}
|
|
|
|
// Find where the start value or the end value is between the range values
|
|
if (isset($rangestart)) {
|
|
// Need to check for a date greater than the start date
|
|
$sql_join->sql .= " JOIN resource_node drrn" . $c . "s ON drrn" . $c . "s.resource=r.ref JOIN node drn" . $c . "s ON drn" . $c . "s.ref=drrn" . $c . "s.node AND drn" . $c . "s.resource_type_field = ? AND drn" . $c . "s.name>= ? ";
|
|
array_push($sql_join->parameters, "i", $fieldinfo['ref'], "s", $rangestart);
|
|
}
|
|
if (isset($rangeend)) {
|
|
// Need to check for a date earlier than the end date
|
|
$sql_join->sql .= " JOIN resource_node drrn" . $c . "e ON drrn" . $c . "e.resource=r.ref JOIN node drn" . $c . "e ON drn" . $c . "e.ref=drrn" . $c . "e.node AND drn" . $c . "e.resource_type_field = ? AND drn" . $c . "e.name <= ? ";
|
|
array_push($sql_join->parameters, "i", $fieldinfo['ref'], "s", $rangeend);
|
|
}
|
|
}
|
|
$keywordprocessed = true;
|
|
} elseif ($field_short_name_specified && substr($keystring, 0, 8) == "numrange" && !$quoted_string && !$ignore_filters && isset($fieldinfo['type']) && $fieldinfo['type'] == 0) {
|
|
// Text field numrange search ie mynumberfield:numrange1|1234 indicates that mynumberfield needs a numrange search for 1 to 1234.
|
|
$c++;
|
|
$rangefieldinfo = ps_query("SELECT ref FROM resource_type_field WHERE name = ? AND type IN (0)", ["s",$fieldname], "schema");
|
|
$rangefieldinfo = $rangefieldinfo[0];
|
|
$rangefield = $rangefieldinfo["ref"];
|
|
$rangestring = substr($keystring, 8);
|
|
$minmax = explode("|", $rangestring);
|
|
$min = str_replace("neg", "-", $minmax[0]);
|
|
if (isset($minmax[1])) {
|
|
$max = str_replace("neg", "-", $minmax[1]);
|
|
} else {
|
|
$max = '';
|
|
}
|
|
if ($max != '' || $min != '') {
|
|
// At least the min or max should be set
|
|
if ($max == '' || $min == '') {
|
|
// if only one number is entered, do a direct search
|
|
if ($sql_filter->sql != "") {
|
|
$sql_filter->sql .= " AND ";
|
|
}
|
|
$sql_filter->sql .= "rnn" . $c . ".name = ? ";
|
|
array_push($sql_filter->parameters, "d", max($min, $max));
|
|
} else {
|
|
// else use min and max values as a range search
|
|
if ($sql_filter->sql != "") {
|
|
$sql_filter->sql .= " AND ";
|
|
}
|
|
$sql_filter->sql .= "rnn" . $c . ".name >= ? ";
|
|
array_push($sql_filter->parameters, "d", $min);
|
|
if ($sql_filter->sql != "") {
|
|
$sql_filter->sql .= " AND ";
|
|
}
|
|
$sql_filter->sql .= "rnn" . $c . ".name <= ? ";
|
|
array_push($sql_filter->parameters, "d", $max);
|
|
}
|
|
}
|
|
|
|
$sql_join->sql .= " JOIN resource_node rrn" . $c . " ON rrn" . $c . ".resource=r.ref LEFT JOIN node rnn" . $c . " ON rnn" . $c . ".ref=rrn" . $c . ".node AND rnn" . $c . ".resource_type_field = ?";
|
|
|
|
array_push($sql_join->parameters, "i", $rangefield);
|
|
$keywordprocessed = true;
|
|
} elseif (
|
|
$field_short_name_specified
|
|
&& !$ignore_filters
|
|
&& isset($fieldinfo['type'])
|
|
&& in_array($fieldinfo['type'], $FIXED_LIST_FIELD_TYPES)
|
|
) {
|
|
// Convert legacy fixed list field search to new format for nodes (@@NodeID)
|
|
// We've searched using a legacy format (ie. fieldShortName:keyword), try and convert it to @@NodeID
|
|
$field_nodes = get_nodes($fieldinfo['ref'], null, false, true);
|
|
|
|
// Check if multiple nodes have been specified for an OR search
|
|
$keywords_expanded = explode(';', $keystring);
|
|
$nodeorcount = count($node_bucket);
|
|
foreach ($keywords_expanded as $keyword_expanded) {
|
|
debug("keyword_expanded: " . $keyword_expanded);
|
|
$field_node_index = array_search(mb_strtolower(i18n_get_translated($keyword_expanded)), array_map('i18n_get_translated', array_map('mb_strtolower', array_column($field_nodes, 'name'))));
|
|
// Take the ref of the node and add it to the node_bucket as an OR
|
|
if (false !== $field_node_index) {
|
|
$node_bucket[$nodeorcount][] = $field_nodes[$field_node_index]['ref'];
|
|
$quoted_field_match = true; // String has been resolved to a node so even if it is quoted we don't need to process it as a quoted string now
|
|
$keywordprocessed = true;
|
|
}
|
|
}
|
|
} elseif (
|
|
$field_short_name_specified
|
|
&& !$quoted_string
|
|
&& preg_match(
|
|
'/^(?=[^ \\t\\n]*[!@#$%^&()_+\\-=\\[\\]{}\'\"\\\\,.<>\\/?])[^ \\t\\n]*$/',
|
|
$keyword
|
|
) === 1
|
|
) {
|
|
// Quote field specific keywords that contain special characters and no spaces
|
|
$keyword = "\"$keyword\"";
|
|
$quoted_string = true;
|
|
}
|
|
|
|
if ($field_short_name_specified) { // Need this also for string matching in a named text field
|
|
$keyword = $keystring;
|
|
$search_field_restrict = $fieldinfo['ref'];
|
|
}
|
|
|
|
if (!$quoted_string && !$keywordprocessed && !($field_short_name_specified && hook('customsearchkeywordfilter', null, array($kw)))) { // Need this also for string matching in a named text field
|
|
// Normal keyword
|
|
//
|
|
// Searches all fields that the user has access to
|
|
// If ignoring field specifications then remove them.
|
|
$keywords_expanded = explode(';', $keyword);
|
|
$keywords_expanded_or = count($keywords_expanded) > 1;
|
|
# Omit resources containing this keyword?
|
|
$omit = false;
|
|
if (substr($keyword, 0, 1) == "-") {
|
|
$omit = true;
|
|
$keyword = substr($keyword, 1);
|
|
}
|
|
|
|
# Search for resources with an empty field, ex: !empty18 or !emptycaption
|
|
$empty = false;
|
|
if (substr($keyword, 0, 6) == "!empty") {
|
|
$nodatafield = str_replace("!empty", "", $keyword);
|
|
|
|
if (!is_numeric($nodatafield)) {
|
|
$nodatafield = ps_value("SELECT ref value FROM resource_type_field WHERE name = ?", ["i",$nodatafield], "", "schema");
|
|
}
|
|
|
|
if ($nodatafield == "" || !is_numeric($nodatafield)) {
|
|
exit('invalid !empty search');
|
|
}
|
|
$empty = true;
|
|
}
|
|
|
|
if (in_array($keyword, $noadd)) { # skip common words that are excluded from indexing
|
|
debug("do_search(): skipped common word: {$keyword}");
|
|
} else {
|
|
// ********************************************************************************
|
|
// Handle wildcards
|
|
// ********************************************************************************
|
|
$wildcards = false;
|
|
if (strpos($keyword, "*") !== false || $wildcard_always_applied) {
|
|
if ($wildcard_always_applied && strpos($keyword, "*") === false) {
|
|
# Suffix asterisk if none supplied and using $wildcard_always_applied mode.
|
|
$keyword = $keyword . "*";
|
|
}
|
|
$wildcards = true;
|
|
}
|
|
|
|
$keyref = resolve_keyword(str_replace('*', '', $keyword), false, true, !$quoted_string); # Resolve keyword. Ignore any wildcards when resolving. We need wildcards to be present later but not here.
|
|
if ($keyref === false) {
|
|
if ($stemming) {
|
|
// Attempt to find match for original keyword
|
|
$keyref = resolve_keyword(str_replace('*', '', $keyword), false, true, false);
|
|
}
|
|
|
|
if ($keyref === false) {
|
|
if ($keywords_expanded_or) {
|
|
$alternative_keywords = array();
|
|
foreach ($keywords_expanded as $keyword_expanded) {
|
|
$alternative_keyword_keyref = resolve_keyword($keyword_expanded, false, true, true);
|
|
if ($alternative_keyword_keyref === false) {
|
|
continue;
|
|
}
|
|
|
|
$alternative_keywords[] = $alternative_keyword_keyref;
|
|
}
|
|
|
|
if (count($alternative_keywords) > 0) {
|
|
// Multiple alternative keywords
|
|
$alternative_keywords_sql = new PreparedStatementQuery();
|
|
$alternative_keywords_sql->sql = " OR nk[union_index].keyword IN (" . ps_param_insert(count($alternative_keywords)) . ")";
|
|
$alternative_keywords_sql->parameters = ps_param_fill($alternative_keywords, "i");
|
|
debug("do_search(): \$alternative_keywords_sql = {$alternative_keywords_sql->sql}, parameters = " . implode(",", $alternative_keywords_sql->parameters));
|
|
}
|
|
} elseif (strpos($keyword, "*") === false && preg_match('/\\s/', $search) !== 1) {
|
|
// Check keyword for defined separators and if found each part of the value is added as a keyword for checking. Not for wildcards with no spaces
|
|
$contains_separators = false;
|
|
foreach ($config_separators as $separator) {
|
|
if (strpos($keyword, $separator) !== false) {
|
|
$contains_separators = true;
|
|
}
|
|
}
|
|
if ($contains_separators === true) {
|
|
$keyword_split = split_keywords($keyword);
|
|
|
|
if ($field_short_name_specified) {
|
|
$keyword_split = array_map(prefix_value($fieldname . ":"), $keyword_split);
|
|
}
|
|
$keywords = array_merge($keywords, $keyword_split);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($keyref === false && !$omit && !$empty && !$wildcards && !$field_short_name_specified && !$canskip) {
|
|
// ********************************************************************************
|
|
// No wildcards
|
|
// ********************************************************************************
|
|
|
|
$fullmatch = false;
|
|
$soundex = resolve_soundex($keyword);
|
|
if ($soundex === false) {
|
|
# No keyword match, and no keywords sound like this word. Suggest dropping this word.
|
|
$suggested[$n] = "";
|
|
} else {
|
|
# No keyword match, but there's a word that sounds like this word. Suggest this word instead.
|
|
$suggested[$n] = "<i>" . $soundex . "</i>";
|
|
}
|
|
} else {
|
|
// ********************************************************************************
|
|
// Found wildcards
|
|
// ********************************************************************************
|
|
|
|
// Multiple alternative keywords
|
|
$alternative_keywords_sql = new PreparedStatementQuery();
|
|
$alternative_keywords = array();
|
|
if ($keywords_expanded_or) {
|
|
foreach ($keywords_expanded as $keyword_expanded) {
|
|
$alternative_keyword_keyref = resolve_keyword($keyword_expanded, false, true, true);
|
|
|
|
if ($alternative_keyword_keyref === false) {
|
|
continue;
|
|
}
|
|
|
|
$alternative_keywords[] = $alternative_keyword_keyref;
|
|
}
|
|
|
|
if (count($alternative_keywords) > 0) {
|
|
$alternative_keywords_sql->sql = " OR nk[union_index].keyword IN (" . ps_param_insert(count($alternative_keywords)) . ")";
|
|
$alternative_keywords_sql->parameters = ps_param_fill($alternative_keywords, "i");
|
|
debug("do_search(): \$alternative_keywords_sql = {$alternative_keywords_sql->sql}, parameters = " . implode(",", $alternative_keywords_sql->parameters));
|
|
}
|
|
}
|
|
|
|
if ($keyref === false) {
|
|
# make a new keyword
|
|
$keyref = resolve_keyword(str_replace('*', '', $keyword), true, true, false);
|
|
}
|
|
# Key match, add to query.
|
|
$c++;
|
|
|
|
$relatedsql = new PreparedStatementQuery();
|
|
|
|
# Add related keywords
|
|
$related = get_related_keywords($keyref);
|
|
if ($stemming) {
|
|
# Need to ensure we include related keywords for original string
|
|
$original_keyref = resolve_keyword(str_replace('*', '', $keyword), false, true, false);
|
|
if ($original_keyref && $original_keyref !== $keyref) {
|
|
$original_related = get_related_keywords($original_keyref);
|
|
if (count($original_related) > 0) {
|
|
$original_related_kws = ps_array("SELECT keyword AS `value` FROM keyword WHERE ref IN (" . ps_param_insert(count($original_related)) . ")", ps_param_fill($original_related, "i"));
|
|
|
|
$extra_related = array();
|
|
foreach ($original_related_kws as $orig_related_kw) {
|
|
$extrakeyword = GetStem(trim($orig_related_kw));
|
|
$extra_related[] = resolve_keyword($extrakeyword, true, false, false);
|
|
}
|
|
$related = array_merge($related, $extra_related);
|
|
}
|
|
}
|
|
}
|
|
if (count($related) > 0) {
|
|
$relatedsql->sql .= " OR (nk[union_index].keyword IN (" . ps_param_insert(count($related)) . ")";
|
|
$relatedsql->parameters = array_merge($relatedsql->parameters, ps_param_fill($related, "i"));
|
|
|
|
if ($field_short_name_specified && isset($fieldinfo['ref'])) {
|
|
$relatedsql->sql .= " AND nk[union_index].node IN (SELECT ref FROM node WHERE resource_type_field = ? )";
|
|
$relatedsql->parameters[] = "i";
|
|
$relatedsql->parameters[] = $fieldinfo['ref'];
|
|
}
|
|
$relatedsql->sql .= ")";
|
|
}
|
|
|
|
# Form join
|
|
$sql_exclude_fields = hook("excludefieldsfromkeywordsearch");
|
|
|
|
if ($omit) {
|
|
# Exclude matching resources from query (omit feature)
|
|
if ($sql_filter->sql != "") {
|
|
$sql_filter->sql .= " AND ";
|
|
}
|
|
|
|
// ----- check that keyword does not exist via resource_node->node_keyword relationship -----
|
|
|
|
$sql_filter->sql .= "`r`.`ref` NOT IN (SELECT `resource` FROM `resource_node` JOIN `node_keyword` ON `resource_node`.`node`=`node_keyword`.`node`" .
|
|
" WHERE `resource_node`.`resource`=`r`.`ref` AND `node_keyword`.`keyword` = ?)";
|
|
array_push($sql_filter->parameters, "i", $keyref);
|
|
} else # Include in query
|
|
{
|
|
// --------------------------------------------------------------------------------
|
|
// Start of normal union for resource keywords
|
|
// --------------------------------------------------------------------------------
|
|
|
|
// // these restrictions apply to both !empty searches as well as normal keyword searches (i.e. both branches of next if statement)
|
|
$union_restriction_clause = new PreparedStatementQuery();
|
|
$skipfields = array();
|
|
|
|
if (!empty($sql_exclude_fields)) {
|
|
if ($wildcards) {
|
|
$union_restriction_clause->sql = " AND resource_type_field NOT IN (" . ps_param_insert(count($sql_exclude_fields)) . ")";
|
|
} else {
|
|
$union_restriction_clause->sql .= " AND nk[union_index].node NOT IN (SELECT ref FROM node WHERE resource_type_field IN (" . ps_param_insert(count($sql_exclude_fields)) . "))";
|
|
}
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ps_param_fill($sql_exclude_fields, "i"));
|
|
|
|
$skipfields = explode(",", str_replace(array("'","\""), "", $sql_exclude_fields));
|
|
}
|
|
|
|
if (count($hidden_indexed_fields) > 0) {
|
|
if ($wildcards) {
|
|
$union_restriction_clause->sql = " AND resource_type_field NOT IN (" . ps_param_insert(count($hidden_indexed_fields)) . ")";
|
|
} else {
|
|
$union_restriction_clause->sql .= " AND nk[union_index].node NOT IN (SELECT ref FROM node WHERE node.resource_type_field IN (" . ps_param_insert(count($hidden_indexed_fields)) . "))";
|
|
}
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ps_param_fill($hidden_indexed_fields, "i"));
|
|
$skipfields = array_merge($skipfields, $hidden_indexed_fields);
|
|
}
|
|
if (isset($search_field_restrict) && $search_field_restrict != "") {
|
|
// Search is looking for a keyword in a specified field
|
|
if ($wildcards) {
|
|
$union_restriction_clause->sql .= " AND resource_type_field = ?";
|
|
} else {
|
|
$union_restriction_clause->sql .= " AND nk[union_index].node IN (SELECT ref FROM node WHERE node.resource_type_field = ?)";
|
|
}
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ["i",$search_field_restrict]);
|
|
}
|
|
if ($empty) { // we are dealing with a special search checking if a field is empty
|
|
// First check user can see this field
|
|
if (in_array($nodatafield, $skipfields)) {
|
|
// Not permitted to check this field, return false
|
|
return false;
|
|
}
|
|
|
|
$selectedrestypes = [];
|
|
$restypes = trim((string)($restypes));
|
|
if ($restypes != "") {
|
|
$selectedrestypes = explode(",", $restypes);
|
|
}
|
|
|
|
$restypesql = new PreparedStatementQuery();
|
|
|
|
$nodatafieldinfo = get_resource_type_field($nodatafield);
|
|
$nodatarestypes = trim((string)$nodatafieldinfo["resource_types"]);
|
|
if ($nodatarestypes != "") {
|
|
$nodatarestypes = explode(",", $nodatarestypes);
|
|
} else {
|
|
$nodatarestypes = [];
|
|
}
|
|
|
|
if ($nodatafieldinfo["global"] === 1) {
|
|
// Global field empty search
|
|
// Candidate resources are those which exist in selected resource types
|
|
if (count($selectedrestypes) > 0) {
|
|
$restypesql->sql = " AND r[union_index].resource_type IN (" . ps_param_insert(count($selectedrestypes)) . ") ";
|
|
$restypesql->parameters = ps_param_fill($selectedrestypes, "i");
|
|
}
|
|
} else {
|
|
// Non-global field empty search
|
|
// Candidate resources are those whose resource type is linked to the field and which exists in selected resource types
|
|
if (count($selectedrestypes) > 0) {
|
|
$candidaterestypes = array_intersect($nodatarestypes, $selectedrestypes);
|
|
} else {
|
|
$candidaterestypes = $nodatarestypes;
|
|
}
|
|
$restypesql->sql = " AND r[union_index].resource_type IN (" . ps_param_insert(count($candidaterestypes)) . ") ";
|
|
$restypesql->parameters = ps_param_fill($candidaterestypes, "i");
|
|
}
|
|
|
|
// Check that nodes are empty
|
|
$union = new PreparedStatementQuery();
|
|
$union->sql = "SELECT ref AS resource, [bit_or_condition] 1 AS score FROM resource r[union_index] WHERE r[union_index].ref NOT IN
|
|
(
|
|
SELECT rn.resource FROM
|
|
node n
|
|
RIGHT JOIN resource_node rn ON rn.node=n.ref
|
|
WHERE n.resource_type_field = ? $restypesql->sql
|
|
GROUP BY rn.resource
|
|
)";
|
|
$union->parameters = array_merge(["i",$nodatafield], $restypesql->parameters);
|
|
$sql_keyword_union[] = $union;
|
|
$sql_keyword_union_criteria[] = "`h`.`keyword_[union_index]_found`";
|
|
$sql_keyword_union_aggregation[] = "BIT_OR(`keyword_[union_index]_found`) AS `keyword_[union_index]_found`";
|
|
$sql_keyword_union_or[] = false;
|
|
} elseif ($wildcards) {
|
|
$union = new PreparedStatementQuery();
|
|
if (
|
|
substr($keyword, 0, 1) != "*"
|
|
&& strlen(trim($keyword, '*')) >= 3
|
|
&& preg_match('/[-+@<>().]/', $keyword) !== 1
|
|
) {
|
|
// Use fulltext search as a preference, but not if
|
|
// there is a leading wildcard
|
|
// or the search is too short
|
|
// or the search contains any of +,-,@,<,>,(,),.
|
|
$union->sql = "
|
|
SELECT resource, [bit_or_condition] hit_count AS score
|
|
FROM resource_node rn[union_index]
|
|
WHERE rn[union_index].node IN
|
|
(SELECT ref FROM `node` WHERE MATCH(name) AGAINST (? IN BOOLEAN MODE) "
|
|
. $union_restriction_clause->sql . ")
|
|
GROUP BY resource " .
|
|
($non_field_keyword_sql->sql != "" ? $non_field_keyword_sql->sql : "");
|
|
$keyword = "+" . $keyword;
|
|
} elseif (
|
|
preg_match('/\W/', str_replace("*", "", $keyword)) !== 1
|
|
) {
|
|
// Use a LIKE search, unless there is any whitespace
|
|
$keyword = str_replace("*", "%", $keyword);
|
|
$union->sql = "
|
|
SELECT resource, [bit_or_condition] hit_count AS score
|
|
FROM resource_node rn[union_index]
|
|
WHERE rn[union_index].node IN
|
|
(SELECT n.ref
|
|
FROM keyword k
|
|
JOIN node_keyword nk ON nk.keyword=k.ref
|
|
JOIN `node` n ON n.ref=nk.node
|
|
WHERE k.keyword LIKE ? "
|
|
. $union_restriction_clause->sql . ")
|
|
GROUP BY resource ";
|
|
} else {
|
|
// Use RLIKE to search between word boundaries in the node names
|
|
$keyword = preg_quote($keyword);
|
|
$keyword = str_replace('\*', '.*?', $keyword);
|
|
if (preg_match('/\w/', $keyword) === 0) {
|
|
// Use lookaheads/lookbehinds for boundary-like behavior when the keyword consists of non-word characters (e.g. ,, &, -)
|
|
// because \b only works for transitions between word characters (letters, digits, _) and non-word characters.
|
|
$keyword = "(?<!\w)" . $keyword . "(?!\w)";
|
|
} else {
|
|
$keyword = '\\b' . $keyword . '\\b';
|
|
}
|
|
|
|
$union->sql = "
|
|
SELECT resource, [bit_or_condition] hit_count AS score
|
|
FROM resource_node rn[union_index]
|
|
WHERE rn[union_index].node IN
|
|
(SELECT ref FROM `node` WHERE name RLIKE ? "
|
|
. $union_restriction_clause->sql . ")
|
|
GROUP BY resource " .
|
|
($non_field_keyword_sql->sql != "" ? $non_field_keyword_sql->sql : "");
|
|
}
|
|
$union->parameters = array_merge(["s",$keyword], $union_restriction_clause->parameters);
|
|
if ($non_field_keyword_sql->sql != "") {
|
|
$union->parameters = array_merge($union->parameters, $non_field_keyword_sql->parameters);
|
|
}
|
|
$sql_keyword_union[] = $union;
|
|
$sql_keyword_union_criteria[] = "`h`.`keyword_[union_index]_found`";
|
|
$sql_keyword_union_or[] = "";
|
|
$sql_keyword_union_aggregation[] = "BIT_OR(`keyword_[union_index]_found`) AS `keyword_[union_index]_found`";
|
|
} else // we are dealing with a standard keyword match
|
|
{
|
|
// ----- resource_node -> node_keyword sub query -----
|
|
$union = new PreparedStatementQuery();
|
|
|
|
$union->sql = " SELECT resource, [bit_or_condition] hit_count AS score
|
|
FROM resource_node rn[union_index]
|
|
WHERE rn[union_index].node IN
|
|
(SELECT node
|
|
FROM `node_keyword` nk[union_index]
|
|
WHERE ((nk[union_index].keyword = ? " . $relatedsql->sql . ") " . $union_restriction_clause->sql . ")" .
|
|
($alternative_keywords_sql->sql != "" ? ($alternative_keywords_sql->sql . $union_restriction_clause->sql) : "" ) .
|
|
") GROUP BY resource " .
|
|
($non_field_keyword_sql->sql != "" ? $non_field_keyword_sql->sql : "") ;
|
|
|
|
$union->parameters = array_merge(["i",$keyref], $relatedsql->parameters, $union_restriction_clause->parameters);
|
|
if ($alternative_keywords_sql->sql != "") {
|
|
$union->parameters = array_merge($union->parameters, $alternative_keywords_sql->parameters, $union_restriction_clause->parameters);
|
|
}
|
|
if ($non_field_keyword_sql->sql != "") {
|
|
$union->parameters = array_merge($union->parameters, $non_field_keyword_sql->parameters);
|
|
}
|
|
|
|
$sql_keyword_union[] = $union;
|
|
|
|
// ---- end of resource_node -> node_keyword sub query -----
|
|
$sql_keyword_union_criteria[] = "`h`.`keyword_[union_index]_found`";
|
|
$sql_keyword_union_aggregation[] = "BIT_OR(`keyword_[union_index]_found`) AS `keyword_[union_index]_found`";
|
|
|
|
$sql_keyword_union_or[] = $keywords_expanded_or;
|
|
|
|
// Log this
|
|
if ($stats_logging && !$go) {
|
|
$keywords_used[] = $keyref;
|
|
}
|
|
} // End of standard keyword match
|
|
} // end if not omit
|
|
} // end found wildcards
|
|
} // end handle wildcards
|
|
} // end normal keyword
|
|
} // End of if not quoted string and end of check if special search
|
|
if ($quoted_string && !$quoted_field_match) {
|
|
$quotedfieldid = "";
|
|
// This keyword is a quoted string, split into keywords but don't preserve quotes this time
|
|
if ($field_short_name_specified && isset($fieldinfo['ref'])) {
|
|
// We have already parsed the keyword when looking for a node, get string and then filter on this field
|
|
$quotedkeywords = split_keywords($keystring);
|
|
$quotedfieldid = $fieldinfo['ref'];
|
|
} else {
|
|
$quotedkeywords = split_keywords(substr($keyword, 1, -1));
|
|
}
|
|
$omit = false;
|
|
if (substr($keyword, 0, 1) == "-") {
|
|
$omit = true;
|
|
$keyword = substr($keyword, 1);
|
|
}
|
|
|
|
$qk = 1; // Set the counter to the first keyword
|
|
$last_key_offset = 1;
|
|
$fixedunion = new PreparedStatementQuery();
|
|
$fixedunioncondition = new PreparedStatementQuery();
|
|
foreach ($quotedkeywords as $quotedkeyword) {
|
|
global $noadd, $wildcard_always_applied;
|
|
if (in_array($quotedkeyword, $noadd)) { # skip common words that are excluded from indexing
|
|
# Support skipped keywords - if the last keyword was skipped (listed in $noadd), increase the allowed position from the previous keyword. Useful for quoted searches that contain $noadd words, e.g. "black and white" where "and" is a skipped keyword.
|
|
++$last_key_offset;
|
|
} else {
|
|
$keyref = resolve_keyword($quotedkeyword, false, true, false); # Resolve keyword.
|
|
if ($keyref === false) {
|
|
# make a new keyword
|
|
$keyref = resolve_keyword($quotedkeyword, true, true, false);
|
|
}
|
|
|
|
$union_restriction_clause = new PreparedStatementQuery();
|
|
|
|
if (!empty($sql_exclude_fields)) {
|
|
$union_restriction_clause->sql .= " AND nk_[union_index]_" . $qk . ".node NOT IN (SELECT ref FROM node WHERE resource_type_field IN (" . ps_param_insert(count($sql_exclude_fields)) . "))";
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ps_param_fill($sql_exclude_fields, "i"));
|
|
}
|
|
|
|
if (count($hidden_indexed_fields) > 0) {
|
|
$union_restriction_clause->sql .= " AND nk_[union_index]_" . $qk . ".node NOT IN (SELECT ref FROM node WHERE resource_type_field IN (" . ps_param_insert(count($hidden_indexed_fields)) . "))";
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ps_param_fill($hidden_indexed_fields, "i"));
|
|
}
|
|
|
|
if ($quotedfieldid != "") {
|
|
$union_restriction_clause->sql .= " AND nk_[union_index]_" . $qk . ".node IN (SELECT ref FROM node WHERE resource_type_field = ? )";
|
|
$union_restriction_clause->parameters = array_merge($union_restriction_clause->parameters, ["i",$quotedfieldid]);
|
|
}
|
|
|
|
if ($qk == 1) {
|
|
// Add code to find matching nodes in resource_node
|
|
$fixedunion->sql = " SELECT rn_[union_index]_" . $qk . ".resource, [bit_or_condition] rn_[union_index]_" . $qk . ".hit_count AS score FROM resource_node rn_[union_index]_" . $qk .
|
|
" LEFT OUTER JOIN `node_keyword` nk_[union_index]_" . $qk . " ON rn_[union_index]_" . $qk . ".node=nk_[union_index]_" . $qk . ".node AND (nk_[union_index]_" . $qk . ".keyword = ? " . ")";
|
|
$fixedunion->parameters = array_merge($fixedunion->parameters, ["i",$keyref]);
|
|
|
|
$fixedunioncondition->sql = "nk_[union_index]_" . $qk . ".keyword = ? " . $union_restriction_clause->sql;
|
|
$fixedunioncondition->parameters = array_merge(["i",$keyref], $union_restriction_clause->parameters);
|
|
} else {
|
|
# For keywords other than the first one, check the position is next to the previous keyword.
|
|
# Also check these occurances are within the same field.
|
|
$fixedunion->sql .= " JOIN `node_keyword` nk_[union_index]_" . $qk . " ON nk_[union_index]_" . $qk . ".node = nk_[union_index]_" . ($qk - 1) . ".node AND nk_[union_index]_" . $qk . ".keyword = ? AND nk_[union_index]_" . $qk . ".position=nk_[union_index]_" . ($qk - 1) . ".position+" . $last_key_offset ;
|
|
array_push($fixedunion->parameters, "i", $keyref);
|
|
}
|
|
|
|
$last_key_offset = 1;
|
|
$qk++;
|
|
} // End of if keyword not excluded (not in $noadd array)
|
|
} // End of each keyword in quoted string
|
|
|
|
if (trim($fixedunioncondition->sql) != "") {
|
|
if ($omit) {# Exclude matching resources from query (omit feature)
|
|
if ($sql_filter->sql != "") {
|
|
$sql_filter->sql .= " AND ";
|
|
}
|
|
|
|
$sql_filter->sql .= str_replace("[bit_or_condition]", "", " r.ref NOT IN (SELECT resource FROM (" . $fixedunion->sql . " WHERE " . $fixedunioncondition->sql . ") qfilter[union_index]) "); # Instead of adding to the union, filter out resources that do contain the quoted string.
|
|
|
|
|
|
$sql_filter->parameters = array_merge($sql_filter->parameters, $fixedunion->parameters, $fixedunioncondition->parameters);
|
|
} elseif (is_a($fixedunion, "PreparedStatementQuery")) {
|
|
$addunion = new PreparedStatementQuery();
|
|
$addunion->sql = $fixedunion->sql . " WHERE " . $fixedunioncondition->sql . " GROUP BY resource ";
|
|
$addunion->parameters = array_merge($fixedunion->parameters, $fixedunioncondition->parameters);
|
|
$sql_keyword_union[] = $addunion;
|
|
$sql_keyword_union_aggregation[] = "BIT_OR(`keyword_[union_index]_found`) AS `keyword_[union_index]_found` ";
|
|
$sql_keyword_union_or[] = false;
|
|
$sql_keyword_union_criteria[] = "`h`.`keyword_[union_index]_found`";
|
|
}
|
|
}
|
|
$c++;
|
|
}
|
|
} // end keywords expanded loop
|
|
if (isset($datefieldjoin)) {
|
|
$date_string = sprintf(
|
|
"%s-%s-%s",
|
|
$date_parts['year'] ?? '____',
|
|
$date_parts['month'] ?? '__',
|
|
$date_parts['day'] ?? '__'
|
|
) . '%';
|
|
$sql_filter->sql .= ($sql_filter->sql != "" ? " AND " : "") . "rdn" . $datefieldjoin . ".name like ? ";
|
|
array_push($sql_filter->parameters, "s", $date_string);
|
|
}
|
|
} // end keysearch if
|