DS-4440 GDPR - Anonymize statistics feature - feedback: default configuration & spelling

This commit is contained in:
Samuel
2020-04-09 14:52:51 +02:00
committed by Samuel
parent 10f38d3fe5
commit d71472fcc3
4 changed files with 38 additions and 21 deletions

View File

@@ -51,7 +51,7 @@ public class AnonymizeStatistics {
private static Logger log = getLogger(AnonymizeStatistics.class); private static Logger log = getLogger(AnonymizeStatistics.class);
private static Context context = new Context(); private static Context context = new Context();
private static String action = "anonymise_statistics"; private static String action = "anonymize_statistics";
private static final String HELP_OPTION = "h"; private static final String HELP_OPTION = "h";
private static final String SLEEP_OPTION = "s"; private static final String SLEEP_OPTION = "s";
@@ -68,14 +68,14 @@ public class AnonymizeStatistics {
private static int batchSize = 100; private static int batchSize = 100;
private static int threads = 2; private static int threads = 2;
private static final Object ANONYMISED = private static final Object ANONYMIZE =
configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised"); configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
private static final String TIME_LIMIT; private static final String TIME_LIMIT;
static { static {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
calendar.add(DAY_OF_YEAR, -configurationService.getIntProperty("anonymise_statistics.time_limit", 90)); calendar.add(DAY_OF_YEAR, -configurationService.getIntProperty("anonymize_statistics.time_threshold", 90));
TIME_LIMIT = format(calendar, DATE_FORMAT_8601); TIME_LIMIT = format(calendar, DATE_FORMAT_8601);
} }
@@ -87,7 +87,7 @@ public class AnonymizeStatistics {
public static void main(String... args) throws ParseException { public static void main(String... args) throws ParseException {
parseCommandLineOptions(createCommandLineOptions(), args); parseCommandLineOptions(createCommandLineOptions(), args);
anonymiseStatistics(); anonymizeStatistics();
} }
private static Options createCommandLineOptions() { private static Options createCommandLineOptions() {
@@ -170,7 +170,7 @@ public class AnonymizeStatistics {
} }
private static void anonymiseStatistics() { private static void anonymizeStatistics() {
try { try {
long updated = 0; long updated = 0;
long total = getDocuments().getResults().getNumFound(); long total = getDocuments().getResults().getNumFound();
@@ -233,7 +233,7 @@ public class AnonymizeStatistics {
return solrLoggerService.query( return solrLoggerService.query(
"ip:*", "ip:*",
"time:[* TO " + TIME_LIMIT + "] AND -dns:" + ANONYMISED, "time:[* TO " + TIME_LIMIT + "] AND -dns:" + ANONYMIZE,
null, batchSize, -1, null, null, null, null, null, false, false, true null, batchSize, -1, null, null, null, null, null, false, false, true
); );
} }
@@ -258,8 +258,8 @@ public class AnonymizeStatistics {
"dns" "dns"
), ),
asList( asList(
singletonList(solrLoggerService.anonymiseIp(document.getFieldValue("ip").toString())), singletonList(solrLoggerService.anonymizeIp(document.getFieldValue("ip").toString())),
singletonList(ANONYMISED) singletonList(ANONYMIZE)
), ),
false false
); );

View File

@@ -339,9 +339,9 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
if (request != null) { if (request != null) {
String ip = clientInfoService.getClientIp(request); String ip = clientInfoService.getClientIp(request);
if (configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) { if (configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
try { try {
doc1.addField("ip", anonymiseIp(ip)); doc1.addField("ip", anonymizeIp(ip));
} catch (UnknownHostException e) { } catch (UnknownHostException e) {
log.warn(e.getMessage(), e); log.warn(e.getMessage(), e);
} }
@@ -355,8 +355,8 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
} }
try { try {
String dns = configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised"); String dns = configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
if (!configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) { if (!configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
dns = DnsLookup.reverseDns(ip); dns = DnsLookup.reverseDns(ip);
} }
doc1.addField("dns", dns.toLowerCase()); doc1.addField("dns", dns.toLowerCase());
@@ -426,9 +426,9 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
// Save our basic info that we already have // Save our basic info that we already have
ip = clientInfoService.getClientIp(ip, xforwardedfor); ip = clientInfoService.getClientIp(ip, xforwardedfor);
if (configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) { if (configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
try { try {
doc1.addField("ip", anonymiseIp(ip)); doc1.addField("ip", anonymizeIp(ip));
} catch (UnknownHostException e) { } catch (UnknownHostException e) {
log.warn(e.getMessage(), e); log.warn(e.getMessage(), e);
} }
@@ -437,8 +437,8 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
} }
try { try {
String dns = configurationService.getProperty("anonymise_statistics.dns_mask", "anonymised"); String dns = configurationService.getProperty("anonymize_statistics.dns_mask", "anonymized");
if (!configurationService.getBooleanProperty("anonymise_statistics.anonymise_on_log", false)) { if (!configurationService.getBooleanProperty("anonymize_statistics.anonymize_on_log", false)) {
dns = DnsLookup.reverseDns(ip); dns = DnsLookup.reverseDns(ip);
} }
doc1.addField("dns", dns.toLowerCase()); doc1.addField("dns", dns.toLowerCase());
@@ -1708,14 +1708,14 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
statisticYearCoresInit = true; statisticYearCoresInit = true;
} }
public Object anonymiseIp(String ip) throws UnknownHostException { public Object anonymizeIp(String ip) throws UnknownHostException {
InetAddress address = InetAddress.getByName(ip); InetAddress address = InetAddress.getByName(ip);
if (address instanceof Inet4Address) { if (address instanceof Inet4Address) {
return ip.replaceFirst(IP_V4_REGEX, "$1" + configurationService.getProperty( return ip.replaceFirst(IP_V4_REGEX, "$1" + configurationService.getProperty(
"anonymise_statistics.ip_v4_mask", "255")); "anonymize_statistics.ip_v4_mask", "255"));
} else if (address instanceof Inet6Address) { } else if (address instanceof Inet6Address) {
return ip.replaceFirst(IP_V6_REGEX, "$1:" + configurationService.getProperty( return ip.replaceFirst(IP_V6_REGEX, "$1:" + configurationService.getProperty(
"anonymise_statistics.ip_v6_mask", "FFFF:FFFF")); "anonymize_statistics.ip_v6_mask", "FFFF:FFFF"));
} }
throw new UnknownHostException("unknown ip format"); throw new UnknownHostException("unknown ip format");

View File

@@ -225,6 +225,6 @@ public interface SolrLoggerService {
public void commitShard(String shard) throws Exception; public void commitShard(String shard) throws Exception;
public Object anonymiseIp(String ip) throws UnknownHostException; public Object anonymizeIp(String ip) throws UnknownHostException;
} }

View File

@@ -44,3 +44,20 @@ usage-statistics.authorization.admin.workflow=true
# Set to true if the statistics core is sharded into a core per year, defaults to false # Set to true if the statistics core is sharded into a core per year, defaults to false
# If you are sharding your statistics index each year by running "dspace stats-util -s", you should set this to "true" # If you are sharding your statistics index each year by running "dspace stats-util -s", you should set this to "true"
usage-statistics.shardedByYear = false usage-statistics.shardedByYear = false
# Configuration parameters for anonymizing statistics
# Anonymize statistics the moment they are created
# anonymize_statistics.anonymize_on_log = false
# Mask to replace the last group of an IPv4 address
# anonymize_statistics.ip_v4_mask = 255"
# Mask to replace the last two groups of an IPv6 address
# anonymize_statistics.ip_v6_mask = FFFF:FFFF"
# Mask to replace the DNS
# anonymize_statistics.dns_mask = anonymized
# Only anonymize statistics records older than this threshold (expressed in days)
# anonymize_statistics.time_threshold = 90