Commit a8536671 authored by Ard Schrijvers's avatar Ard Schrijvers

REPO-2007 [Backport 11.2] Reset invalid RUNNING locks to free when a cluster node starts

If a cluster node (say 'node1') starts and it finds locks in the lock
table that are for 'node1', it means that the cluster node did not release
all its locks during shutdown (graceful or ungraceful) and it did come
up again before other cluster nodes had reset the lock to FREE because
the lock did not yet reach its expiration time (or there were no other
cluster nodes).
In normal situations the DbResetExpiredLocksJanitor takes care of freeing
expired locks, for example locks that belonged to a shut down cluster
node. But in the scenario above, the DbResetExpiredLocksJanitor did not
yet free the locks, resulting in
org.onehippo.repository.lock.db.DbLockManager#createLock not allowing the
lock to be created BUT the DbLockRefresher starts refreshing the database
lock nonetheless, hence, no thread in any cluster node can reclaim the
lock any more.
A solution could be that the DbLockRefresher only refreshes locks that
are present in the
org.onehippo.repository.lock.AbstractLockManager#localLocks object.
However that would make the refresh statement complexer, so instead,
during start up now invalid live locks are reset.

(cherry picked from commit 2fdd5458ae3439092b58b7febb380f69057fca7f)
parent be8849f6
......@@ -67,6 +67,15 @@ public class DbLockManager extends AbstractLockManager {
"lastModified=? " +
"WHERE lockKey=? AND lockOwner=? AND lockThread=?";
private static final String RESET_INVALID_LIVE_LOCKS_STATEMENT = "UPDATE %s SET " +
"lockOwner=NULL, " +
"lockThread=NULL, " +
"status='FREE', " +
"lockTime=0, " +
"expirationTime=0, " +
"lastModified=? " +
"WHERE lockOwner=? AND (status='RUNNING' OR status='ABORT')";
private static final String RESET_EXPIRED_STATEMENT = "UPDATE %s SET " +
"lockOwner=NULL, " +
......@@ -96,6 +105,7 @@ public class DbLockManager extends AbstractLockManager {
private final String allLockedStatement;
private final String resetLockStatement;
private final String resetExpiredStatement;
private final String resetInvalidLiveLocksStatement;
private final String removeOutdatedStatement;
private final String abortStatement;
private final String refreshLockStatement;
......@@ -112,6 +122,7 @@ public class DbLockManager extends AbstractLockManager {
this.allLockedStatement = String.format(ALL_LOCKED_STATEMENT, tableName);
this.resetLockStatement = String.format(RESET_LOCK_STATEMENT, tableName);
this.resetExpiredStatement = String.format(RESET_EXPIRED_STATEMENT, tableName);
this.resetInvalidLiveLocksStatement = String.format(RESET_INVALID_LIVE_LOCKS_STATEMENT, tableName);
this.removeOutdatedStatement = String.format(REMOVE_OUTDATED_LOCKS, tableName);
this.abortStatement = String.format(ABORT_STATEMENT, tableName);
this.refreshLockStatement = String.format(REFRESH_LOCK_STATEMENT, tableName);
......@@ -119,6 +130,8 @@ public class DbLockManager extends AbstractLockManager {
createTableIfNeeded(dataSource, connectionHelper, getCreateLockTableStatement(), tableName, schemaCheckEnabled, "lockKey");
resetInvalidLiveLocks();
addJob(new UnlockStoppedThreadJanitor());
addJob(new DbResetExpiredLocksJanitor(this));
final int oneDaySeconds = 24 * 60 * 60;
......@@ -175,6 +188,30 @@ public class DbLockManager extends AbstractLockManager {
}
}
private void resetInvalidLiveLocks() {
// stop any lockKey for the current cluster node ID that is in state RUNNING (or ABORT): This can happen when a cluster node
// has an ungraceful shutdown (or graceful but some jobs did not finish not clearing the locks) AND restarts within
// 1 minute since then the DbResetExpiredLocksJanitor did not yet clean up the abandoned locks
Connection connection = null;
boolean originalAutoCommit = false;
try {
connection = getConnection();
originalAutoCommit = connection.getAutoCommit();
connection.setAutoCommit(true);
try (final PreparedStatement resetObsoleteLocksStatement = connection.prepareStatement(resetInvalidLiveLocksStatement)) {
long currentTime = System.currentTimeMillis();
resetObsoleteLocksStatement.setLong(1, currentTime);
resetObsoleteLocksStatement.setString(2, getClusterNodeId());
int updated = resetObsoleteLocksStatement.executeUpdate();
log.info("Reset {} locks", updated);
}
} catch (SQLException e) {
log.error("Error while trying to reset locks", e);
} finally {
close(connection, originalAutoCommit);
}
}
public Connection getConnection() throws SQLException {
return dataSource.getConnection();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment