mirror of
https://github.com/DSpace/dspace-angular.git
synced 2025-10-07 10:04:11 +00:00
Refactor to two caches. One for bots and one for anonymous users
This commit is contained in:
@@ -43,17 +43,37 @@ cache:
|
|||||||
maxBufferSize: 100
|
maxBufferSize: 100
|
||||||
timePerMethod:
|
timePerMethod:
|
||||||
PATCH: 3 # time in seconds
|
PATCH: 3 # time in seconds
|
||||||
# In-memory cache of server-side rendered pages. This cache stores the most recently accessed public pages.
|
# In-memory cache(s) of server-side rendered pages. These caches will store the most recently accessed public pages.
|
||||||
# Pages are automatically added/dropped from this cache based on how recently they have been used.
|
# Pages are automatically added/dropped from these caches based on how recently they have been used.
|
||||||
|
# Restarting the app clears all page caches.
|
||||||
|
# NOTE: To control the cache size, use the "max" setting. Keep in mind, individual cached pages are usually small (<100KB).
|
||||||
|
# Enabling *both* caches will mean that a page may be cached twice, once in each cache (but may expire at different times via timeToLive).
|
||||||
serverSide:
|
serverSide:
|
||||||
# Maximum number of pages to cache. Set to zero (0) to disable server side caching. Default is 100, which means
|
# When enabled (i.e. max > 0), known bots will be sent pages from a server side cache specific for bots.
|
||||||
# the 100 most recently accessed public pages will be cached. As all pages are cached in server memory,
|
# (Keep in mind, bot detection cannot be guarranteed. It is possible some bots will bypass this cache.)
|
||||||
# increasing this value will increase memory needs. Individual cached pages are usually small (<100KB),
|
botCache:
|
||||||
# so max=100 should only require a maximum of 9-10MB of memory. Restarting the app clears this page cache.
|
# Maximum number of pages to cache for known bots. Set to zero (0) to disable server side caching for bots.
|
||||||
max: 100
|
# Default is 1000, which means the 1000 most recently accessed public pages will be cached.
|
||||||
|
# As all pages are cached in server memory, increasing this value will increase memory needs.
|
||||||
|
# Individual cached pages are usually small (<100KB), so max=1000 should only require ~100MB of memory.
|
||||||
|
max: 1000
|
||||||
# Amount of time after which cached pages are considered stale (in ms). After becoming stale, the cached
|
# Amount of time after which cached pages are considered stale (in ms). After becoming stale, the cached
|
||||||
# copy is automatically refreshed on the next request.
|
# copy is automatically refreshed on the next request.
|
||||||
timeToLive: 900000 # 15 minutes
|
# NOTE: For the bot cache, this setting may impact how quickly search engine bots will index new content on your site.
|
||||||
|
# For example, setting this to one week may mean that search engine bots may not find all new content for one week.
|
||||||
|
timeToLive: 86400000 # 1 day
|
||||||
|
# When enabled (i.e. max > 0), all anonymous users will be sent pages from a server side cache.
|
||||||
|
# This allows anonymous users to interact more quickly with the site, but also means they may see slightly
|
||||||
|
# outdated content (based on timeToLive)
|
||||||
|
anonymousCache:
|
||||||
|
# Maximum number of pages to cache. Default is zero (0) which means anonymous user cache is disabled.
|
||||||
|
# As all pages are cached in server memory, increasing this value will increase memory needs.
|
||||||
|
# Individual cached pages are usually small (<100KB), so a value of max=1000 would only require ~100MB of memory.
|
||||||
|
max: 0
|
||||||
|
# Amount of time after which cached pages are considered stale (in ms). After becoming stale, the cached
|
||||||
|
# copy is automatically refreshed on the next request.
|
||||||
|
# NOTE: For the anonymous cache, it is recommended to keep this value low to avoid anonymous users seeing outdated content.
|
||||||
|
timeToLive: 10000 # 10 seconds
|
||||||
|
|
||||||
# Authentication settings
|
# Authentication settings
|
||||||
auth:
|
auth:
|
||||||
|
@@ -99,6 +99,7 @@
|
|||||||
"fast-json-patch": "^3.0.0-1",
|
"fast-json-patch": "^3.0.0-1",
|
||||||
"filesize": "^6.1.0",
|
"filesize": "^6.1.0",
|
||||||
"http-proxy-middleware": "^1.0.5",
|
"http-proxy-middleware": "^1.0.5",
|
||||||
|
"isbot": "^3.6.5",
|
||||||
"js-cookie": "2.2.1",
|
"js-cookie": "2.2.1",
|
||||||
"js-yaml": "^4.1.0",
|
"js-yaml": "^4.1.0",
|
||||||
"json5": "^2.2.2",
|
"json5": "^2.2.2",
|
||||||
|
152
server.ts
152
server.ts
@@ -29,6 +29,7 @@ import * as expressStaticGzip from 'express-static-gzip';
|
|||||||
|
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import LRU from 'lru-cache';
|
import LRU from 'lru-cache';
|
||||||
|
import isbot from 'isbot';
|
||||||
import { createCertificate } from 'pem';
|
import { createCertificate } from 'pem';
|
||||||
import { createServer } from 'https';
|
import { createServer } from 'https';
|
||||||
import { json } from 'body-parser';
|
import { json } from 'body-parser';
|
||||||
@@ -70,8 +71,11 @@ const cookieParser = require('cookie-parser');
|
|||||||
|
|
||||||
const appConfig: AppConfig = buildAppConfig(join(DIST_FOLDER, 'assets/config.json'));
|
const appConfig: AppConfig = buildAppConfig(join(DIST_FOLDER, 'assets/config.json'));
|
||||||
|
|
||||||
// cache of SSR pages, only enabled in production mode
|
// cache of SSR pages for known bots, only enabled in production mode
|
||||||
let cache: LRU<string, any>;
|
let botCache: LRU<string, any>;
|
||||||
|
|
||||||
|
// cache of SSR pages for anonymous users. Disabled by default, and only available in production mode
|
||||||
|
let anonymousCache: LRU<string, any>;
|
||||||
|
|
||||||
// extend environment with app config for server
|
// extend environment with app config for server
|
||||||
extendEnvironmentWithAppConfig(environment, appConfig);
|
extendEnvironmentWithAppConfig(environment, appConfig);
|
||||||
@@ -257,10 +261,10 @@ function serverSideRender(req, res, sendToUser: boolean = true) {
|
|||||||
providers: [{ provide: APP_BASE_HREF, useValue: req.baseUrl }]
|
providers: [{ provide: APP_BASE_HREF, useValue: req.baseUrl }]
|
||||||
}, (err, data) => {
|
}, (err, data) => {
|
||||||
if (hasNoValue(err) && hasValue(data)) {
|
if (hasNoValue(err) && hasValue(data)) {
|
||||||
res.locals.ssr = true; // mark response as SSR (enables text compression)
|
// save server side rendered page to cache (if any are enabled)
|
||||||
// save server side rendered data to cache
|
|
||||||
saveToCache(req, data);
|
saveToCache(req, data);
|
||||||
if (sendToUser) {
|
if (sendToUser) {
|
||||||
|
res.locals.ssr = true; // mark response as SSR (enables text compression)
|
||||||
// send rendered page to user
|
// send rendered page to user
|
||||||
res.send(data);
|
res.send(data);
|
||||||
}
|
}
|
||||||
@@ -313,24 +317,45 @@ function addCacheControl(req, res, next) {
|
|||||||
* Initialize server-side caching of pages rendered via SSR.
|
* Initialize server-side caching of pages rendered via SSR.
|
||||||
*/
|
*/
|
||||||
function initCache() {
|
function initCache() {
|
||||||
if (cacheEnabled()) {
|
if (botCacheEnabled()) {
|
||||||
// Initialize a new "least-recently-used" item cache (where least recently used items are removed first)
|
// Initialize a new "least-recently-used" item cache (where least recently used pages are removed first)
|
||||||
// See https://www.npmjs.com/package/lru-cache
|
// See https://www.npmjs.com/package/lru-cache
|
||||||
cache = new LRU( {
|
// When enabled, each page defaults to expiring after 1 day
|
||||||
max: environment.cache.serverSide.max || 100, // 100 items in cache maximum
|
botCache = new LRU( {
|
||||||
ttl: environment.cache.serverSide.timeToLive || 15 * 60 * 1000, // 15 minute cache
|
max: environment.cache.serverSide.botCache.max,
|
||||||
|
ttl: environment.cache.serverSide.botCache.timeToLive || 24 * 60 * 60 * 1000, // 1 day
|
||||||
|
allowStale: true // If object is found to be stale, return stale value before deleting
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (anonymousCacheEnabled()) {
|
||||||
|
// NOTE: While caches may share SSR pages, this cache must be kept separately because the timeToLive
|
||||||
|
// may expire pages more frequently.
|
||||||
|
// When enabled, each page defaults to expiring after 10 seconds (to minimize anonymous users seeing out-of-date content)
|
||||||
|
anonymousCache = new LRU( {
|
||||||
|
max: environment.cache.serverSide.anonymousCache.max,
|
||||||
|
ttl: environment.cache.serverSide.anonymousCache.timeToLive || 10 * 1000, // 10 seconds
|
||||||
allowStale: true // If object is found to be stale, return stale value before deleting
|
allowStale: true // If object is found to be stale, return stale value before deleting
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return whether server side caching is enabled in configuration.
|
* Return whether bot-specific server side caching is enabled in configuration.
|
||||||
*/
|
*/
|
||||||
function cacheEnabled(): boolean {
|
function botCacheEnabled(): boolean {
|
||||||
// Caching is only enabled is SSR is enabled AND
|
// Caching is only enabled if SSR is enabled AND
|
||||||
// "serverSide.max" setting is greater than zero
|
// "max" pages to cache is greater than zero
|
||||||
return environment.universal.preboot && environment.cache.serverSide.max && (environment.cache.serverSide.max > 0);
|
return environment.universal.preboot && environment.cache.serverSide.botCache.max && (environment.cache.serverSide.botCache.max > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether anonymous user server side caching is enabled in configuration.
|
||||||
|
*/
|
||||||
|
function anonymousCacheEnabled(): boolean {
|
||||||
|
// Caching is only enabled if SSR is enabled AND
|
||||||
|
// "max" pages to cache is greater than zero
|
||||||
|
return environment.universal.preboot && environment.cache.serverSide.anonymousCache.max && (environment.cache.serverSide.anonymousCache.max > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -338,43 +363,64 @@ function cacheEnabled(): boolean {
|
|||||||
* Caching is ONLY done for SSR requests. Pages are cached base on their path (e.g. /home or /search?query=test)
|
* Caching is ONLY done for SSR requests. Pages are cached base on their path (e.g. /home or /search?query=test)
|
||||||
*/
|
*/
|
||||||
function cacheCheck(req, res, next) {
|
function cacheCheck(req, res, next) {
|
||||||
let cacheHit = false;
|
// Cached copy of page (if found)
|
||||||
let debug = false; // Enable to see cache hits & re-rendering logs
|
let cachedCopy;
|
||||||
|
|
||||||
// Only check cache if cache enabled & NOT authenticated.
|
// If the bot cache is enabled and this request looks like a bot, check the bot cache for a cached page.
|
||||||
// NOTE: Authenticated users cannot use the SSR cache. Cached pages only show data available to anonymous users.
|
if (botCacheEnabled() && isbot(req.get('user-agent'))) {
|
||||||
// Only public pages can currently be cached, as the cached data is not user-specific.
|
cachedCopy = checkCacheForRequest('bot', botCache, req, res);
|
||||||
if (cacheEnabled() && !isUserAuthenticated(req)) {
|
} else if (anonymousCacheEnabled() && !isUserAuthenticated(req)) {
|
||||||
|
cachedCopy = checkCacheForRequest('anonymous', anonymousCache, req, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If cached copy exists, return it to the user.
|
||||||
|
if (cachedCopy) {
|
||||||
|
res.locals.ssr = true; // mark response as SSR-generated (enables text compression)
|
||||||
|
res.send(cachedCopy);
|
||||||
|
|
||||||
|
// Tell Express to skip all other handlers for this path
|
||||||
|
// This ensures we don't try to re-render the page since we've already returned the cached copy
|
||||||
|
next('router');
|
||||||
|
} else {
|
||||||
|
// If nothing found in cache, just continue with next handler
|
||||||
|
// (This should send the request on to the handler that rerenders the page via SSR
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the current request (i.e. page) is found in the given cache. If it is found,
|
||||||
|
* the cached copy is returned. When found, this method also triggers a re-render via
|
||||||
|
* SSR if the cached copy is now expired (i.e. timeToLive has passed for this cached copy).
|
||||||
|
* @param cacheName name of cache (just useful for debug logging)
|
||||||
|
* @param cache LRU cache to check
|
||||||
|
* @param req current request to look for in the cache
|
||||||
|
* @param res current response
|
||||||
|
* @returns cached copy (if found) or undefined (if not found)
|
||||||
|
*/
|
||||||
|
function checkCacheForRequest(cacheName: string, cache: LRU<string, any>, req, res): any {
|
||||||
|
let debug = false; // Enable to see cache hits & re-rendering in logs
|
||||||
|
|
||||||
|
// Get the cache key for this request
|
||||||
const key = getCacheKey(req);
|
const key = getCacheKey(req);
|
||||||
|
|
||||||
// Check if this page is in our cache
|
// Check if this page is in our cache
|
||||||
let cachedCopy = cache.get(key);
|
let cachedCopy = cache.get(key);
|
||||||
if (cachedCopy) {
|
if (cachedCopy) {
|
||||||
cacheHit = true;
|
if (debug) { console.log(`CACHE HIT FOR ${key} in ${cacheName} cache`); }
|
||||||
res.locals.ssr = true; // mark response as SSR (enables text compression)
|
|
||||||
if (debug) { console.log(`CACHE HIT FOR ${key}`); }
|
|
||||||
// return page from cache to user
|
|
||||||
res.send(cachedCopy);
|
|
||||||
|
|
||||||
// Check if cached copy is expired (in this sitution key will now be gone from cache)
|
// Check if cached copy is expired (If expired, the key will now be gone from cache)
|
||||||
if (!cache.has(key)) {
|
if (!cache.has(key)) {
|
||||||
if (debug) { console.log(`CACHE EXPIRED FOR ${key} Re-rendering...`); }
|
if (debug) { console.log(`CACHE EXPIRED FOR ${key} in ${cacheName} cache. Re-rendering...`); }
|
||||||
// Update cached copy by rerendering server-side
|
// Update cached copy by rerendering server-side
|
||||||
// NOTE: Cached copy was already returned to user above. So, this re-render is just to prepare for next user.
|
// NOTE: In this scenario the currently cached copy will be returned to the current user.
|
||||||
|
// This re-render is peformed behind the scenes to update cached copy for next user.
|
||||||
serverSideRender(req, res, false);
|
serverSideRender(req, res, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tell Express to skip all other handlers for this path
|
|
||||||
// This ensures we don't try to re-render the page since we've already returned the cached copy
|
|
||||||
next('router');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If nothing found in cache, just continue with next handler
|
// return page from cache
|
||||||
// (This should send the request on to the handler that rerenders the page via SSR)
|
return cachedCopy;
|
||||||
if (!cacheHit) {
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -390,20 +436,30 @@ function getCacheKey(req): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save data to server side cache, if enabled. If caching is not enabled or user is authenticated, this is a noop
|
* Save page to server side cache(s), if enabled. If caching is not enabled or a user is authenticated, this is a noop
|
||||||
|
* If multiple caches are enabled, the page will be saved to any caches where it does not yet exist (or is expired).
|
||||||
|
* (This minimizes the number of times we need to run SSR on the same page.)
|
||||||
* @param req current page request
|
* @param req current page request
|
||||||
* @param data page data to save to cache
|
* @param page page data to save to cache
|
||||||
*/
|
*/
|
||||||
function saveToCache(req, data: any) {
|
function saveToCache(req, page: any) {
|
||||||
// Only cache if caching is enabled and no one is currently authenticated. This means ONLY public pages can be cached.
|
// Only cache if no one is currently authenticated. This means ONLY public pages can be cached.
|
||||||
// NOTE: It's not safe to save page data to the cache when a user is authenticated. In that situation,
|
// NOTE: It's not safe to save page data to the cache when a user is authenticated. In that situation,
|
||||||
// the page may include sensitive or user-specific materials. As the cache is shared across all users, it can only contain public info.
|
// the page may include sensitive or user-specific materials. As the cache is shared across all users, it can only contain public info.
|
||||||
if (cacheEnabled() && !isUserAuthenticated(req)) {
|
if (!isUserAuthenticated(req)) {
|
||||||
const key = getCacheKey(req);
|
const key = getCacheKey(req);
|
||||||
// Make sure this key is not already in our cache. If "has()" returns true,
|
// Avoid caching "/reload/[random]" paths (these are hard refreshes after logout)
|
||||||
// then it's in the cache already and *not* expired.
|
if (key.startsWith('/reload')) { return; }
|
||||||
if (!cache.has(key)) {
|
|
||||||
cache.set(key, data);
|
// If bot cache is enabled, save it to that cache if it doesn't exist or is expired
|
||||||
|
// (NOTE: has() will return false if page is expired in cache)
|
||||||
|
if (botCacheEnabled() && !botCache.has(key)) {
|
||||||
|
botCache.set(key, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If anonymous cache is enabled, save it to that cache if it doesn't exist or is expired
|
||||||
|
if (anonymousCacheEnabled() && !anonymousCache.has(key)) {
|
||||||
|
anonymousCache.set(key, page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -412,7 +468,7 @@ function saveToCache(req, data: any) {
|
|||||||
* Whether a user is authenticated or not
|
* Whether a user is authenticated or not
|
||||||
*/
|
*/
|
||||||
function isUserAuthenticated(req): boolean {
|
function isUserAuthenticated(req): boolean {
|
||||||
// Check whether our authentication Cookie exists or not
|
// Check whether our DSpace authentication Cookie exists or not
|
||||||
return req.cookies[TOKENITEM];
|
return req.cookies[TOKENITEM];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -8,11 +8,22 @@ export interface CacheConfig extends Config {
|
|||||||
// Cache-Control HTTP Header
|
// Cache-Control HTTP Header
|
||||||
control: string;
|
control: string;
|
||||||
autoSync: AutoSyncConfig;
|
autoSync: AutoSyncConfig;
|
||||||
// In-memory cache of server-side rendered content
|
// In-memory caches of server-side rendered (SSR) content. These caches can be used to limit the frequency
|
||||||
|
// of re-generating SSR pages to improve performance.
|
||||||
serverSide: {
|
serverSide: {
|
||||||
// Maximum number of pages (rendered via SSR) to cache.
|
// Cache specific to known bots. Allows you to serve cached contents to bots only.
|
||||||
|
botCache: {
|
||||||
|
// Maximum number of pages (rendered via SSR) to cache. Setting max=0 disables the cache.
|
||||||
|
max: number;
|
||||||
|
// Amount of time after which cached pages are considered stale (in ms)
|
||||||
|
timeToLive: number;
|
||||||
|
},
|
||||||
|
// Cache specific to anonymous users. Allows you to serve cached content to non-authenticated users.
|
||||||
|
anonymousCache: {
|
||||||
|
// Maximum number of pages (rendered via SSR) to cache. Setting max=0 disables the cache.
|
||||||
max: number;
|
max: number;
|
||||||
// Amount of time after which cached pages are considered stale (in ms)
|
// Amount of time after which cached pages are considered stale (in ms)
|
||||||
timeToLive: number;
|
timeToLive: number;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -76,10 +76,22 @@ export class DefaultAppConfig implements AppConfig {
|
|||||||
},
|
},
|
||||||
// In-memory cache of server-side rendered content
|
// In-memory cache of server-side rendered content
|
||||||
serverSide: {
|
serverSide: {
|
||||||
// Maximum number of pages (rendered via SSR) to cache. Set to zero to disable server side caching.
|
// Cache specific to known bots. Allows you to serve cached contents to bots only.
|
||||||
max: 100,
|
// Defaults to caching 1,000 pages. Each page expires after 1 day
|
||||||
|
botCache: {
|
||||||
|
// Maximum number of pages (rendered via SSR) to cache. Setting max=0 disables the cache.
|
||||||
|
max: 1000,
|
||||||
// Amount of time after which cached pages are considered stale (in ms)
|
// Amount of time after which cached pages are considered stale (in ms)
|
||||||
timeToLive: 15 * 60 * 1000 // 15 minutes
|
timeToLive: 24 * 60 * 60 * 1000, // 1 day
|
||||||
|
},
|
||||||
|
// Cache specific to anonymous users. Allows you to serve cached content to non-authenticated users.
|
||||||
|
// Defaults to caching 0 pages. But, when enabled, each page expires after 10 seconds (to minimize anonymous users seeing out-of-date content)
|
||||||
|
anonymousCache: {
|
||||||
|
// Maximum number of pages (rendered via SSR) to cache. Setting max=0 disables the cache.
|
||||||
|
max: 0, // disabled by default
|
||||||
|
// Amount of time after which cached pages are considered stale (in ms)
|
||||||
|
timeToLive: 10 * 1000, // 10 seconds
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -6749,6 +6749,11 @@ isbinaryfile@^4.0.8:
|
|||||||
resolved "https://registry.yarnpkg.com/isbinaryfile/-/isbinaryfile-4.0.10.tgz#0c5b5e30c2557a2f06febd37b7322946aaee42b3"
|
resolved "https://registry.yarnpkg.com/isbinaryfile/-/isbinaryfile-4.0.10.tgz#0c5b5e30c2557a2f06febd37b7322946aaee42b3"
|
||||||
integrity sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==
|
integrity sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==
|
||||||
|
|
||||||
|
isbot@^3.6.5:
|
||||||
|
version "3.6.5"
|
||||||
|
resolved "https://registry.yarnpkg.com/isbot/-/isbot-3.6.5.tgz#a749980d9dfba9ebcc03ee7b548d1f24dd8c9f1e"
|
||||||
|
integrity sha512-BchONELXt6yMad++BwGpa0oQxo/uD0keL7N15cYVf0A1oMIoNQ79OqeYdPMFWDrNhCqCbRuw9Y9F3QBjvAxZ5g==
|
||||||
|
|
||||||
isexe@^2.0.0:
|
isexe@^2.0.0:
|
||||||
version "2.0.0"
|
version "2.0.0"
|
||||||
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
|
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
|
||||||
|
Reference in New Issue
Block a user