commit 452553111b9929074bcbb77a49c041582daae0e8 Author: Panu Matilainen Date: Tue Feb 5 10:11:19 2013 +0200 Check for stale db locks when opening write-cursors - During long-running transactions its entirely possible for some other player to come and go leaving stale locks behind and cause the transaction to get stuck until the cavalry comes along in the form of somebody else opening the rpmdb, clearing the blockage. - Presumably dbenv->failchk() is not entirely free of cost so we only do this for writes which are way more critical and also more prone to getting stuck. - dbenv->failchk() could return DB_RUNRECOVER in which case we should abort everything but we lack a mechanism to do it... just add a reminder comment for now. diff --git a/lib/backend/db3.c b/lib/backend/db3.c index 656486b..de8071b 100644 --- a/lib/backend/db3.c +++ b/lib/backend/db3.c @@ -248,7 +248,7 @@ dbiCursor dbiCursorInit(dbiIndex dbi, unsigned int flags) DB * db = dbi->dbi_db; DBC * cursor; int cflags; - int rc; + int rc = 0; uint32_t eflags = db_envflags(db); /* DB_WRITECURSOR requires CDB and writable db */ @@ -259,8 +259,23 @@ dbiCursor dbiCursorInit(dbiIndex dbi, unsigned int flags) } else cflags = 0; - rc = db->cursor(db, NULL, &cursor, cflags); - rc = cvtdberr(dbi, "db->cursor", rc, _debug); + /* + * Check for stale locks which could block writes "forever". + * XXX: Should we also do this on reads? Reads are less likely + * to get blocked so it seems excessive... + * XXX: On DB_RUNRECOVER, we should abort everything. Now + * we'll just fail to open a cursor again and again and again. + */ + if (cflags & DB_WRITECURSOR) { + DB_ENV *dbenv = db->get_env(db); + rc = dbenv->failchk(dbenv, 0); + rc = cvtdberr(dbi, "dbenv->failchk", rc, _debug); + } + + if (rc == 0) { + rc = db->cursor(db, NULL, &cursor, cflags); + rc = cvtdberr(dbi, "db->cursor", rc, _debug); + } if (rc == 0) { dbc = xcalloc(1, sizeof(*dbc));