数据库:oracle

数据源:druid

现象:生产环境存在两套环境,k8s和rancher,k8s环境每隔几分钟会出现close connection error,或者close statement error或者对数据库访问Broken pipe,然而rancher不会,这种现象下我们排查的重点关注在k8s网络环境,一直没有解决这问题

问题本质看这类错误表现为tcp连接被断开,排查的对象未k8s网络情况和防火墙情况,防火墙默认会关闭一定时间不活跃的连接,具体时间半小时还是多久看运维配置,因为只发生在k8s节点,这块一直在等运维同事处理,这块一直没有太大头绪,最终折中处理方式改为客户端处理,druid配置保活、配置对连接池中连接检测、配置验证sql超时时间、配置移除不活跃连接等,测试跟踪一天,确实没有类似错误了,具体网络层面原因再看了

具体配置:

spring:

datasource:

driver-class-name: oracle.jdbc.OracleDriver

username: xx

password: xx

initialSize: 10

minIdle: 10

maxActive: 50

# 配置获取连接等待超时的时间

maxWait: 60000

# 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒

timeBetweenEvictionRunsMillis: 60000

# 配置一个连接在池中最小生存的时间,单位是毫秒

minEvictableIdleTimeMillis: 300000

validationQuery: SELECT 1 FROM DUAL

testWhileIdle: true

testOnBorrow: true

testOnReturn: false

# 打开PSCache,并且指定每个连接上PSCache的大小

poolPreparedStatements: true

maxPoolPreparedStatementPerConnectionSize: 30

# 配置监控统计拦截的filters,去掉后监控界面sql无法统计,wall用于防火墙

filters: stat,log4j

# 通过connectProperties属性来打开mergeSql功能;慢SQL记录

connectionProperties: druid.stat.mergeSql=true;druid.stat.slowSqlMillis=3000

#使用非公平锁

useUnfairLock: true

removeAbandoned: true

removeAbandonedTimeout: 1800

logAbandoned: false

validationQueryTimeout: 1

keepAlive: true

关键配置:

//每次获取连接后执行validationQuery,一定程度会降低性能,但为了规避上述问题,需要配置

testOnBorrow: true

//非公平锁、重要,减少竞争等待时间

useUnfairLock: true removeAbandoned: true removeAbandonedTimeout: 1800 logAbandoned: false

//validationQueryTimeout 最好配置下,不然可能出现长达15分钟的校验时间,导致整个查询超时

validationQueryTimeout: 1

//在小于minIdle连接数的时候执行保活操作,防止防火墙断开连接

keepAlive: true

druid获取连接源码

public DruidPooledConnection getConnectionDirect(long maxWaitMillis) throws SQLException {

int notFullTimeoutRetryCnt = 0;

for (;;) {

// handle notFullTimeoutRetry

DruidPooledConnection poolableConnection;

try {

poolableConnection = getConnectionInternal(maxWaitMillis);

} catch (GetConnectionTimeoutException ex) {

if (notFullTimeoutRetryCnt <= this.notFullTimeoutRetryCount && !isFull()) {

notFullTimeoutRetryCnt++;

if (LOG.isWarnEnabled()) {

LOG.warn("get connection timeout retry : " + notFullTimeoutRetryCnt);

}

continue;

}

throw ex;

}

if (testOnBorrow) {

boolean validate = testConnectionInternal(poolableConnection.holder, poolableConnection.conn);

if (!validate) {

if (LOG.isDebugEnabled()) {

LOG.debug("skip not validate connection.");

}

Connection realConnection = poolableConnection.conn;

discardConnection(realConnection);

continue;

}

} else {

Connection realConnection = poolableConnection.conn;

if (poolableConnection.conn.isClosed()) {

discardConnection(null); // 传入null,避免重复关闭

continue;

}

if (testWhileIdle) {

long currentTimeMillis = System.currentTimeMillis();

long lastActiveTimeMillis = poolableConnection.holder.lastActiveTimeMillis;

long idleMillis = currentTimeMillis - lastActiveTimeMillis;

long timeBetweenEvictionRunsMillis = this.timeBetweenEvictionRunsMillis;

if (timeBetweenEvictionRunsMillis <= 0) {

timeBetweenEvictionRunsMillis = DEFAULT_TIME_BETWEEN_EVICTION_RUNS_MILLIS;

}

if (idleMillis >= timeBetweenEvictionRunsMillis

|| idleMillis < 0 // unexcepted branch

) {

boolean validate = testConnectionInternal(poolableConnection.holder, poolableConnection.conn);

if (!validate) {

if (LOG.isDebugEnabled()) {

LOG.debug("skip not validate connection.");

}

discardConnection(realConnection);

continue;

}

}

}

}

if (removeAbandoned) {

StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();

poolableConnection.connectStackTrace = stackTrace;

poolableConnection.setConnectedTimeNano();

poolableConnection.traceEnable = true;

activeConnectionLock.lock();

try {

activeConnections.put(poolableConnection, PRESENT);

} finally {

activeConnectionLock.unlock();

}

}

if (!this.defaultAutoCommit) {

poolableConnection.setAutoCommit(false);

}

return poolableConnection;

}

}

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐