DS-4440 GDPR - Anonymize statistics feature - feedback: default configuration & spelling

This commit is contained in:
Samuel
2020-04-09 14:52:51 +02:00
committed by Samuel
parent 10f38d3fe5
commit d71472fcc3
4 changed files with 38 additions and 21 deletions

View File

@@ -51,7 +51,7 @@ public class AnonymizeStatistics {
private static Logger log = getLogger(AnonymizeStatistics.class);
private static Context context = new Context();
private static String action = "anonymise_statistics";
private static String action = "anonymize_statistics";
private static final String HELP_OPTION = "h";
private static final String SLEEP_OPTION = "s";
@@ -68,14 +68,14 @@ public class AnonymizeStatistics {
private static int batchSize = 100;
private static int threads = 2;
private static final Object ANONYMISED =
configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised");
private static final Object ANONYMIZE =
configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
private static final String TIME_LIMIT;
static {
Calendar calendar = Calendar.getInstance();
calendar.add(DAY_OF_YEAR, -configurationService.getIntProperty("anonymise_statistics.time_limit", 90));
calendar.add(DAY_OF_YEAR, -configurationService.getIntProperty("anonymize_statistics.time_threshold", 90));
TIME_LIMIT = format(calendar, DATE_FORMAT_8601);
}
@@ -87,7 +87,7 @@ public class AnonymizeStatistics {
public static void main(String... args) throws ParseException {
parseCommandLineOptions(createCommandLineOptions(), args);
anonymiseStatistics();
anonymizeStatistics();
}
private static Options createCommandLineOptions() {
@@ -170,7 +170,7 @@ public class AnonymizeStatistics {
}
private static void anonymiseStatistics() {
private static void anonymizeStatistics() {
try {
long updated = 0;
long total = getDocuments().getResults().getNumFound();
@@ -233,7 +233,7 @@ public class AnonymizeStatistics {
return solrLoggerService.query(
"ip:*",
"time:[* TO " + TIME_LIMIT + "] AND -dns:" + ANONYMISED,
"time:[* TO " + TIME_LIMIT + "] AND -dns:" + ANONYMIZE,
null, batchSize, -1, null, null, null, null, null, false, false, true
);
}
@@ -258,8 +258,8 @@ public class AnonymizeStatistics {
"dns"
),
asList(
singletonList(solrLoggerService.anonymiseIp(document.getFieldValue("ip").toString())),
singletonList(ANONYMISED)
singletonList(solrLoggerService.anonymizeIp(document.getFieldValue("ip").toString())),
singletonList(ANONYMIZE)
),
false
);

View File

@@ -339,9 +339,9 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
if (request != null) {
String ip = clientInfoService.getClientIp(request);
if (configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) {
if (configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
try {
doc1.addField("ip", anonymiseIp(ip));
doc1.addField("ip", anonymizeIp(ip));
} catch (UnknownHostException e) {
log.warn(e.getMessage(), e);
}
@@ -355,8 +355,8 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
}
try {
String dns = configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised");
if (!configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) {
String dns = configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
if (!configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
dns = DnsLookup.reverseDns(ip);
}
doc1.addField("dns", dns.toLowerCase());
@@ -426,9 +426,9 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
// Save our basic info that we already have
ip = clientInfoService.getClientIp(ip, xforwardedfor);
if (configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) {
if (configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
try {
doc1.addField("ip", anonymiseIp(ip));
doc1.addField("ip", anonymizeIp(ip));
} catch (UnknownHostException e) {
log.warn(e.getMessage(), e);
}
@@ -437,8 +437,8 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
}
try {
String dns = configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised");
if (!configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) {
String dns = configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
if (!configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
dns = DnsLookup.reverseDns(ip);
}
doc1.addField("dns", dns.toLowerCase());
@@ -1708,14 +1708,14 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
statisticYearCoresInit = true;
}
public Object anonymiseIp(String ip) throws UnknownHostException {
public Object anonymizeIp(String ip) throws UnknownHostException {
InetAddress address = InetAddress.getByName(ip);
if (address instanceof Inet4Address) {
return ip.replaceFirst(IP_V4_REGEX, "$1" + configurationService.getProperty(
"anonymise_statistics.ip_v4_mask", "255"));
"anonymize_statistics.ip_v4_mask", "255"));
} else if (address instanceof Inet6Address) {
return ip.replaceFirst(IP_V6_REGEX, "$1:" + configurationService.getProperty(
"anonymise_statistics.ip_v6_mask", "FFFF:FFFF"));
"anonymize_statistics.ip_v6_mask", "FFFF:FFFF"));
}
throw new UnknownHostException("unknown ip format");

View File

@@ -225,6 +225,6 @@ public interface SolrLoggerService {
public void commitShard(String shard) throws Exception;
public Object anonymiseIp(String ip) throws UnknownHostException;
public Object anonymizeIp(String ip) throws UnknownHostException;
}

View File

@@ -44,3 +44,20 @@ usage-statistics.authorization.admin.workflow=true
# Set to true if the statistics core is sharded into a core per year, defaults to false
# If you are sharding your statistics index each year by running "dspace stats-util -s", you should set this to "true"
usage-statistics.shardedByYear = false
# Configuration parameters for anonymizing statistics
# Anonymize statistics the moment they are created
# anonymize_statistics.anonymize_on_log = false
# Mask to replace the last group of an IPv4 address
# anonymize_statistics.ip_v4_mask = 255"
# Mask to replace the last two groups of an IPv6 address
# anonymize_statistics.ip_v6_mask = FFFF:FFFF"
# Mask to replace the DNS
# anonymize_statistics.dns_mask = anonymized
# Only anonymize statistics records older than this threshold (expressed in days)
# anonymize_statistics.time_threshold = 90