问题描述:
系统使用Spring Cache缓存注解,如: @Cacheable、@CacheEvict 等实现缓存时,线上系统线程数爆涨,执行任务的线程进入黑洞一样,请求后没有任何反应。
问题分析:
Redis客户端登录,输入monitor命令后,发现一连串的Exists命令在不断刷新,而redis中也有很多~lock结尾的分布式锁key,且过期时间是永不过期。
手动删除后,系统恢复正常。
经过对 ~lock 关键字的搜索,找到 spring-data-redis-2.2.12.RELEASE.jar 包中 DefaultRedisCacheWriter类的 createCacheLockKey 创建的。
private static byte[] createCacheLockKey(String name) {
return (name + "~lock").getBytes(StandardCharsets.UTF_8);
}
然后,经过对代码上下文的分析,发现~lock前面的是我们在@Cacheable注解中配置的value名称。 在对新增、修改、删除的时候,即put、putIfAbsent、remove、clean 方法中会判断是否获取到锁,获取不到则进入自旋(自旋间隔时间为sleepTime)
// 这里加锁,且没有设置过期时间
private Boolean doLock(String name, RedisConnection connection) {
return connection.setNX(createCacheLockKey(name), new byte[0]);
}
private void checkAndPotentiallyWaitUntilUnlocked(String name, RedisConnection connection) {
if (!isLockingCacheWriter()) {
return;
}
try {
// 这里,进入自旋
while (doCheckLock(name, connection)) {
Thread.sleep(sleepTime.toMillis());
}
} catch (InterruptedException ex) {
// Re-interrupt current thread, to allow other participants to react.
Thread.currentThread().interrupt();
throw new PessimisticLockingFailureException(String.format("Interrupted while waiting to unlock cache %s", name),
ex);
}
}
这就意味着,虽然方法中使用 finally 来解锁,但是在还未解锁的时候,服务异常停止,比如: kill -9 PID, 或者网络波动, 这个锁就永远存在。后续的所有线程都会获取不到锁进入自旋。
问题解决:
1、手动删除~lock后缀的key
2、重新DefaultRedisCacheWriter类,锁设置过期时间。
重新DefaultRedisCacheWriter类,修改doLock方法:
private Boolean doLock(String name, RedisConnection connection) {
// 源码没有添加超时时间,会导致某些情况下会 死锁
// return connection.setNX(createCacheLockKey(name), new byte[0]);
byte[] cacheLockKey = createCacheLockKey(name);
// 将 String 类型的过期时间转换为 byte 数组, 此处注意,expireTime 需要是String类型,然后转byte[], 不能是long或者int类型
byte[] expireTimeBytes = expireTime.getBytes(StandardCharsets.UTF_8);
// 执行 Lua 脚本, 对应参数: 脚本, 返回类型, key的数量, 值
Long result = connection.eval(script.getBytes(StandardCharsets.UTF_8), ReturnType.INTEGER, 1, cacheLockKey, new byte[0], expireTimeBytes);
// 返回结果
return result != null && result == 1L;
}
另外重新clean方法,替代keys命令
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#clean(java.lang.String, byte[])
*/
@Override
public void clean(String name, byte[] pattern) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(pattern, "Pattern must not be null!");
execute(name, connection -> {
boolean wasLocked = false;
try {
if (isLockingCacheWriter()) {
doLock(name, connection);
wasLocked = true;
}
// 使用scan命令代替keys命令
// byte[][] keys = Optional.ofNullable(connection.keys(pattern)).orElse(Collections.emptySet()).toArray(new byte[0][]);
Cursor<byte[]> cursor = connection.scan(new ScanOptions.ScanOptionsBuilder().match(new String(pattern)).count(1000).build());
Set<byte[]> byteSet = new HashSet<>();
while (cursor.hasNext()) {
byteSet.add(cursor.next());
}
byte[][] keys = byteSet.toArray(new byte[0][]);
if (keys.length > 0) {
connection.del(keys);
}
} finally {
if (wasLocked && isLockingCacheWriter()) {
doUnlock(name, connection);
}
}
return "OK";
});
}
完整代码:
/**
*
* @ClassName: RedisCacheServiceImpl
* @Description: redis 缓存服务
* @Author: 杨攀
* @Date: 2022/11/25 17:47
* @Copyright: 2022 www.tuxun.net Inc. All rights reserved.
*
* 重写 RedisCacheWriter
* 优化:
* 1、scan指令替代keys指令
* 2、~lock 锁的时候,没有添加超时时间
*
*
*/
public class TopinfoRedisCacheWriter implements RedisCacheWriter {
private final RedisConnectionFactory connectionFactory;
private final Duration sleepTime;
/** 锁的过期时间, 此处注意,expireTime 需要是String类型,然后转byte[], 不能是long或者int类型 */
private final String expireTime = "30";
/** 定义 Lua 脚本 */
private String script = "local key = KEYS[1]\n" +
"local value = ARGV[1]\n" +
"local expireTime = tonumber(ARGV[2])\n" +
"\n" +
"if redis.call('SETNX', key, value) == 1 then\n" +
" redis.call('EXPIRE', key, expireTime)\n" +
" return 1\n" +
"else\n" +
" return 0\n" +
"end";
/**
* @param connectionFactory must not be {@literal null}.
*/
public TopinfoRedisCacheWriter(RedisConnectionFactory connectionFactory) {
this(connectionFactory, Duration.ZERO);
}
/**
* @param connectionFactory must not be {@literal null}.
* @param sleepTime sleep time between lock request attempts. Must not be {@literal null}. Use {@link Duration#ZERO}
* to disable locking.
*/
public TopinfoRedisCacheWriter(RedisConnectionFactory connectionFactory, Duration sleepTime) {
Assert.notNull(connectionFactory, "ConnectionFactory must not be null!");
Assert.notNull(sleepTime, "SleepTime must not be null!");
this.connectionFactory = connectionFactory;
this.sleepTime = sleepTime;
}
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#put(java.lang.String, byte[], byte[], java.time.Duration)
*/
@Override
public void put(String name, byte[] key, byte[] value, @Nullable Duration ttl) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(key, "Key must not be null!");
Assert.notNull(value, "Value must not be null!");
execute(name, connection -> {
if (shouldExpireWithin(ttl)) {
connection.set(key, value, Expiration.from(ttl.toMillis(), TimeUnit.MILLISECONDS), SetOption.upsert());
} else {
connection.set(key, value);
}
return "OK";
});
}
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#get(java.lang.String, byte[])
*/
@Override
public byte[] get(String name, byte[] key) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(key, "Key must not be null!");
return execute(name, connection -> connection.get(key));
}
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#putIfAbsent(java.lang.String, byte[], byte[], java.time.Duration)
*/
@Override
public byte[] putIfAbsent(String name, byte[] key, byte[] value, @Nullable Duration ttl) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(key, "Key must not be null!");
Assert.notNull(value, "Value must not be null!");
return execute(name, connection -> {
if (isLockingCacheWriter()) {
doLock(name, connection);
}
try {
boolean put;
if (shouldExpireWithin(ttl)) {
put = connection.set(key, value, Expiration.from(ttl), SetOption.ifAbsent());
} else {
put = connection.setNX(key, value);
}
if (put) {
return null;
}
return connection.get(key);
} finally {
if (isLockingCacheWriter()) {
doUnlock(name, connection);
}
}
});
}
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#remove(java.lang.String, byte[])
*/
@Override
public void remove(String name, byte[] key) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(key, "Key must not be null!");
execute(name, connection -> connection.del(key));
}
/*
* (non-Javadoc)
* @see org.springframework.data.redis.cache.RedisCacheWriter#clean(java.lang.String, byte[])
*/
@Override
public void clean(String name, byte[] pattern) {
Assert.notNull(name, "Name must not be null!");
Assert.notNull(pattern, "Pattern must not be null!");
execute(name, connection -> {
boolean wasLocked = false;
try {
if (isLockingCacheWriter()) {
doLock(name, connection);
wasLocked = true;
}
// 使用scan命令代替keys命令
// byte[][] keys = Optional.ofNullable(connection.keys(pattern)).orElse(Collections.emptySet()).toArray(new byte[0][]);
Cursor<byte[]> cursor = connection.scan(new ScanOptions.ScanOptionsBuilder().match(new String(pattern)).count(1000).build());
Set<byte[]> byteSet = new HashSet<>();
while (cursor.hasNext()) {
byteSet.add(cursor.next());
}
byte[][] keys = byteSet.toArray(new byte[0][]);
if (keys.length > 0) {
connection.del(keys);
}
} finally {
if (wasLocked && isLockingCacheWriter()) {
doUnlock(name, connection);
}
}
return "OK";
});
}
/**
* Explicitly set a write lock on a cache.
*
* @param name the name of the cache to lock.
*/
void lock(String name) {
execute(name, connection -> doLock(name, connection));
}
/**
* Explicitly remove a write lock from a cache.
*
* @param name the name of the cache to unlock.
*/
void unlock(String name) {
executeLockFree(connection -> doUnlock(name, connection));
}
private Boolean doLock(String name, RedisConnection connection) {
// 源码没有添加超时时间,会导致某些情况下会 死锁
// return connection.setNX(createCacheLockKey(name), new byte[0]);
byte[] cacheLockKey = createCacheLockKey(name);
// 将 String 类型的过期时间转换为 byte 数组, 此处注意,expireTime 需要是String类型,然后转byte[], 不能是long或者int类型
byte[] expireTimeBytes = expireTime.getBytes(StandardCharsets.UTF_8);
// 执行 Lua 脚本, 对应参数: 脚本, 返回类型, key的数量, 值
Long result = connection.eval(script.getBytes(StandardCharsets.UTF_8), ReturnType.INTEGER, 1, cacheLockKey, new byte[0], expireTimeBytes);
// 返回结果
return result != null && result == 1L;
}
private Long doUnlock(String name, RedisConnection connection) {
return connection.del(createCacheLockKey(name));
}
boolean doCheckLock(String name, RedisConnection connection) {
return connection.exists(createCacheLockKey(name));
}
/**
* @return {@literal true} if {@link RedisCacheWriter} uses locks.
*/
private boolean isLockingCacheWriter() {
return !sleepTime.isZero() && !sleepTime.isNegative();
}
private <T> T execute(String name, Function<RedisConnection, T> callback) {
RedisConnection connection = connectionFactory.getConnection();
try {
checkAndPotentiallyWaitUntilUnlocked(name, connection);
return callback.apply(connection);
} finally {
connection.close();
}
}
private void executeLockFree(Consumer<RedisConnection> callback) {
RedisConnection connection = connectionFactory.getConnection();
try {
callback.accept(connection);
} finally {
connection.close();
}
}
private void checkAndPotentiallyWaitUntilUnlocked(String name, RedisConnection connection) {
if (!isLockingCacheWriter()) {
return;
}
try {
while (doCheckLock(name, connection)) {
Thread.sleep(sleepTime.toMillis());
}
} catch (InterruptedException ex) {
// Re-interrupt current thread, to allow other participants to react.
Thread.currentThread().interrupt();
throw new PessimisticLockingFailureException(String.format("Interrupted while waiting to unlock cache %s", name),
ex);
}
}
private static boolean shouldExpireWithin(@Nullable Duration ttl) {
return ttl != null && !ttl.isZero() && !ttl.isNegative();
}
private static byte[] createCacheLockKey(String name) {
return (name + "~lock").getBytes(StandardCharsets.UTF_8);
}
把 TopinfoRedisCacheWriter 类交给 CacheManager, 替换原来的类:
/**
* 自定义 cacheManager
* @author 杨攀
* @date 2021/6/15 17:35
* @param connectionFactory
* @return org.springframework.cache.CacheManager
*/
@Bean
public CacheManager cacheManager(RedisConnectionFactory connectionFactory, RedisCacheConfiguration configuration) {
/*
* 重写 RedisCacheWriter
* 优化:
* 1、scan指令替代keys指令
* 2、~lock 锁的时候,没有添加超时时间
*/
// RedisCacheWriter redisCacheWriter = RedisCacheWriter.lockingRedisCacheWriter(connectionFactory);
TopinfoRedisCacheWriter redisCacheWriter = new TopinfoRedisCacheWriter(connectionFactory, Duration.ofMillis(50));
RedisCacheConfiguration redisCacheConfiguration = configuration;
return new RedisTtlCacheManager(redisCacheWriter,redisCacheConfiguration);
}
通过测试,问题解决!
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。