#include "db_config.h"
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/hash.h"
#include "dbinc/fop.h"
#include "dbinc/lock.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"
#include "dbinc/txn.h"
static int __db_limbo_fix __P((DB *, DB_TXN *,
DB_TXNLIST *, db_pgno_t *, DBMETA *, db_limbo_state));
static int __db_limbo_bucket __P((DB_ENV *,
DB_TXN *, DB_TXNLIST *, db_limbo_state));
static int __db_limbo_move __P((DB_ENV *, DB_TXN *, DB_TXN *, DB_TXNLIST *));
static int __db_limbo_prepare __P((DB *, DB_TXN *, DB_TXNLIST *));
static int __db_lock_move __P((DB_ENV *,
u_int8_t *, db_pgno_t, db_lockmode_t, DB_TXN *, DB_TXN *));
static int __db_txnlist_pgnoadd __P((DB_ENV *, DB_TXNHEAD *,
int32_t, u_int8_t *, char *, db_pgno_t));
static int __db_txnlist_find_internal __P((DB_ENV *, DB_TXNHEAD *,
db_txnlist_type, u_int32_t, u_int8_t *, DB_TXNLIST **,
int, u_int32_t *));
int
__db_dispatch(dbenv, dtab, dtabsize, db, lsnp, redo, info)
DB_ENV *dbenv;
int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
size_t dtabsize;
DBT *db;
DB_LSN *lsnp;
db_recops redo;
DB_TXNHEAD *info;
{
DB_LSN prev_lsn;
u_int32_t rectype, status, txnid;
int make_call, ret;
memcpy(&rectype, db->data, sizeof(rectype));
memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
make_call = ret = 0;
DB_ASSERT(dbenv, dtab != NULL);
switch (redo) {
case DB_TXN_ABORT:
case DB_TXN_APPLY:
case DB_TXN_PRINT:
make_call = 1;
break;
case DB_TXN_OPENFILES:
memcpy(&prev_lsn, (u_int8_t *)db->data +
sizeof(rectype) + sizeof(txnid), sizeof(prev_lsn));
if (txnid != 0 && prev_lsn.file == 0 && (ret =
__db_txnlist_add(dbenv, info, txnid, TXN_OK, NULL)) != 0)
return (ret);
case DB_TXN_POPENFILES:
if (rectype == DB___dbreg_register ||
rectype == DB___txn_child ||
rectype == DB___txn_ckp || rectype == DB___txn_recycle)
return (dtab[rectype](dbenv, db, lsnp, redo, info));
break;
case DB_TXN_BACKWARD_ROLL:
switch (rectype) {
case DB___txn_regop:
case DB___txn_recycle:
case DB___txn_ckp:
make_call = 1;
break;
case DB___txn_child:
case DB___db_noop:
case DB___fop_file_remove:
case DB___dbreg_register:
make_call = 1;
default:
if (txnid == 0)
break;
ret = __db_txnlist_find(dbenv, info, txnid, &status);
if (ret == DB_NOTFOUND)
return (__db_txnlist_add(dbenv,
info, txnid, TXN_IGNORE, lsnp));
if (ret != 0)
return (ret);
if (status == TXN_IGNORE && rectype != DB___txn_child) {
make_call = 0;
break;
}
if (status == TXN_COMMIT)
break;
make_call = 1;
if (status == TXN_OK &&
(ret = __db_txnlist_update(dbenv,
info, txnid, rectype == DB___txn_xa_regop ?
TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0)
return (ret);
}
break;
case DB_TXN_FORWARD_ROLL:
switch (rectype) {
case DB___txn_recycle:
case DB___txn_ckp:
case DB___db_noop:
case DB___dbreg_register:
make_call = 1;
break;
default:
if (txnid == 0)
status = 0;
else {
ret = __db_txnlist_find(dbenv,
info, txnid, &status);
if (ret == DB_NOTFOUND)
;
else if (ret != 0)
return (ret);
else if (status == TXN_COMMIT) {
make_call = 1;
break;
}
}
#ifndef HAVE_FTRUNCATE
if (status != TXN_IGNORE &&
(rectype == DB___ham_metagroup ||
rectype == DB___ham_groupalloc ||
rectype == DB___db_pg_alloc)) {
make_call = 1;
redo = DB_TXN_BACKWARD_ALLOC;
}
#endif
}
break;
case DB_TXN_BACKWARD_ALLOC:
default:
return (__db_unknown_flag(
dbenv, "__db_dispatch", (u_int32_t)redo));
}
if (make_call) {
if (rectype & DB_debug_FLAG) {
if (redo == DB_TXN_PRINT)
rectype &= ~DB_debug_FLAG;
else {
memcpy(lsnp,
(u_int8_t *)db->data +
sizeof(rectype) +
sizeof(txnid), sizeof(*lsnp));
return (0);
}
}
if (rectype >= DB_user_BEGIN && dbenv->app_dispatch != NULL)
return (dbenv->app_dispatch(dbenv, db, lsnp, redo));
else {
if (rectype > dtabsize || dtab[rectype] == NULL) {
__db_errx(dbenv,
"Illegal record type %lu in log",
(u_long)rectype);
return (EINVAL);
}
return (dtab[rectype](dbenv, db, lsnp, redo, info));
}
}
return (0);
}
int
__db_add_recovery(dbenv, dtab, dtabsize, func, ndx)
DB_ENV *dbenv;
int (***dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
size_t *dtabsize;
int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
u_int32_t ndx;
{
size_t i, nsize;
int ret;
if (ndx >= *dtabsize) {
nsize = ndx + 40;
if ((ret =
__os_realloc(dbenv, nsize * sizeof((*dtab)[0]), dtab)) != 0)
return (ret);
for (i = *dtabsize; i < nsize; ++i)
(*dtab)[i] = NULL;
*dtabsize = nsize;
}
(*dtab)[ndx] = func;
return (0);
}
int
__db_txnlist_init(dbenv, low_txn, hi_txn, trunc_lsn, retp)
DB_ENV *dbenv;
u_int32_t low_txn, hi_txn;
DB_LSN *trunc_lsn;
DB_TXNHEAD **retp;
{
DB_TXNHEAD *headp;
u_int32_t size, tmp;
int ret;
if (low_txn == 0)
size = 1;
else {
if (hi_txn < low_txn) {
tmp = hi_txn;
hi_txn = low_txn;
low_txn = tmp;
}
tmp = hi_txn - low_txn;
if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2)
tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn);
size = tmp / 5;
if (size < 100)
size = 100;
}
if ((ret = __os_malloc(dbenv,
sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0)
return (ret);
memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head));
headp->maxid = hi_txn;
headp->generation = 0;
headp->nslots = size;
headp->gen_alloc = 8;
if ((ret = __os_malloc(dbenv, headp->gen_alloc *
sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) {
__os_free(dbenv, headp);
return (ret);
}
headp->gen_array[0].generation = 0;
headp->gen_array[0].txn_min = TXN_MINIMUM;
headp->gen_array[0].txn_max = TXN_MAXIMUM;
if (trunc_lsn != NULL) {
headp->trunc_lsn = *trunc_lsn;
headp->maxlsn = *trunc_lsn;
} else {
ZERO_LSN(headp->trunc_lsn);
ZERO_LSN(headp->maxlsn);
}
ZERO_LSN(headp->ckplsn);
*retp = headp;
return (0);
}
#define FIND_GENERATION(hp, txnid, gen) do { \
u_int32_t __i; \
for (__i = 0; __i <= (hp)->generation; __i++) \
\
if ((hp)->gen_array[__i].txn_min < \
(hp)->gen_array[__i].txn_max ? \
((txnid) >= (hp)->gen_array[__i].txn_min && \
(txnid) <= (hp)->gen_array[__i].txn_max) : \
((txnid) >= (hp)->gen_array[__i].txn_min || \
(txnid) <= (hp)->gen_array[__i].txn_max)) \
break; \
DB_ASSERT(dbenv, __i <= (hp)->generation); \
gen = (hp)->gen_array[__i].generation; \
} while (0)
int
__db_txnlist_add(dbenv, hp, txnid, status, lsn)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
u_int32_t txnid, status;
DB_LSN *lsn;
{
DB_TXNLIST *elp;
int ret;
if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
return (ret);
LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links);
FIND_GENERATION(hp, txnid, elp->u.t.generation);
elp->type = TXNLIST_TXNID;
elp->u.t.txnid = txnid;
elp->u.t.status = status;
if (txnid > hp->maxid)
hp->maxid = txnid;
if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
hp->maxlsn = *lsn;
DB_ASSERT(dbenv, lsn == NULL ||
status != TXN_COMMIT || LOG_COMPARE(&hp->maxlsn, lsn) >= 0);
return (0);
}
int
__db_txnlist_remove(dbenv, hp, txnid)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
u_int32_t txnid;
{
DB_TXNLIST *entry;
u_int32_t status;
return (__db_txnlist_find_internal(dbenv,
hp, TXNLIST_TXNID, txnid, NULL, &entry, 1, &status));
}
void
__db_txnlist_ckp(dbenv, hp, ckp_lsn)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
DB_LSN *ckp_lsn;
{
COMPQUIET(dbenv, NULL);
if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) &&
LOG_COMPARE(&hp->maxlsn, ckp_lsn) >= 0)
hp->ckplsn = *ckp_lsn;
}
void
__db_txnlist_end(dbenv, hp)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
{
u_int32_t i;
DB_TXNLIST *p;
if (hp == NULL)
return;
for (i = 0; i < hp->nslots; i++)
while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) {
switch (p->type) {
case TXNLIST_LSN:
__os_free(dbenv, p->u.l.lsn_stack);
break;
case TXNLIST_DELETE:
case TXNLIST_PGNO:
case TXNLIST_TXNID:
default:
break;
}
LIST_REMOVE(p, links);
__os_free(dbenv, p);
}
if (hp->gen_array != NULL)
__os_free(dbenv, hp->gen_array);
__os_free(dbenv, hp);
}
int
__db_txnlist_find(dbenv, hp, txnid, statusp)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
u_int32_t txnid, *statusp;
{
DB_TXNLIST *entry;
if (txnid == 0)
return (DB_NOTFOUND);
return (__db_txnlist_find_internal(dbenv, hp,
TXNLIST_TXNID, txnid, NULL, &entry, 0, statusp));
}
int
__db_txnlist_update(dbenv, hp, txnid, status, lsn, ret_status, add_ok)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
u_int32_t txnid, status;
DB_LSN *lsn;
u_int32_t *ret_status;
int add_ok;
{
DB_TXNLIST *elp;
int ret;
if (txnid == 0)
return (DB_NOTFOUND);
ret = __db_txnlist_find_internal(dbenv,
hp, TXNLIST_TXNID, txnid, NULL, &elp, 0, ret_status);
if (ret == DB_NOTFOUND && add_ok) {
*ret_status = status;
return (__db_txnlist_add(dbenv, hp, txnid, status, lsn));
}
if (ret != 0)
return (ret);
if (*ret_status == TXN_IGNORE)
return (0);
elp->u.t.status = status;
if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
hp->maxlsn = *lsn;
return (ret);
}
static int
__db_txnlist_find_internal(dbenv,
hp, type, txnid, uid, txnlistp, delete, statusp)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
db_txnlist_type type;
u_int32_t txnid;
u_int8_t uid[DB_FILE_ID_LEN];
DB_TXNLIST **txnlistp;
int delete;
u_int32_t *statusp;
{
struct __db_headlink *head;
DB_TXNLIST *p;
u_int32_t generation, hash;
int ret;
ret = 0;
if (hp == NULL)
return (DB_NOTFOUND);
switch (type) {
case TXNLIST_TXNID:
hash = txnid;
FIND_GENERATION(hp, txnid, generation);
break;
case TXNLIST_PGNO:
memcpy(&hash, uid, sizeof(hash));
generation = 0;
break;
case TXNLIST_DELETE:
case TXNLIST_LSN:
default:
return (__db_panic(dbenv, EINVAL));
}
head = &hp->head[DB_TXNLIST_MASK(hp, hash)];
LIST_FOREACH(p, head, links) {
if (p->type != type)
continue;
switch (type) {
case TXNLIST_TXNID:
if (p->u.t.txnid != txnid ||
generation != p->u.t.generation)
continue;
*statusp = p->u.t.status;
break;
case TXNLIST_PGNO:
if (memcmp(uid, p->u.p.uid, DB_FILE_ID_LEN) != 0)
continue;
*statusp = TXN_OK;
break;
case TXNLIST_DELETE:
case TXNLIST_LSN:
default:
return (__db_panic(dbenv, EINVAL));
}
if (delete == 1) {
LIST_REMOVE(p, links);
__os_free(dbenv, p);
*txnlistp = NULL;
} else if (p != LIST_FIRST(head)) {
LIST_REMOVE(p, links);
LIST_INSERT_HEAD(head, p, links);
*txnlistp = p;
} else
*txnlistp = p;
return (ret);
}
return (DB_NOTFOUND);
}
int
__db_txnlist_gen(dbenv, hp, incr, min, max)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
int incr;
u_int32_t min, max;
{
int ret;
if (incr < 0) {
--hp->generation;
memmove(hp->gen_array, &hp->gen_array[1],
(hp->generation + 1) * sizeof(hp->gen_array[0]));
} else {
++hp->generation;
if (hp->generation >= hp->gen_alloc) {
hp->gen_alloc *= 2;
if ((ret = __os_realloc(dbenv, hp->gen_alloc *
sizeof(hp->gen_array[0]), &hp->gen_array)) != 0)
return (ret);
}
memmove(&hp->gen_array[1], &hp->gen_array[0],
hp->generation * sizeof(hp->gen_array[0]));
hp->gen_array[0].generation = hp->generation;
hp->gen_array[0].txn_min = min;
hp->gen_array[0].txn_max = max;
}
return (0);
}
int
__db_txnlist_lsnadd(dbenv, hp, lsnp)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
DB_LSN *lsnp;
{
DB_TXNLIST *elp;
int ret;
if (IS_ZERO_LSN(*lsnp))
return (0);
LIST_FOREACH(elp, &hp->head[0], links)
if (elp->type == TXNLIST_LSN)
break;
if (elp == NULL) {
if ((ret = __db_txnlist_lsninit(dbenv, hp, lsnp)) != 0)
return (ret);
return (DB_SURPRISE_KID);
}
if (elp->u.l.stack_indx == elp->u.l.stack_size) {
elp->u.l.stack_size <<= 1;
if ((ret = __os_realloc(dbenv, sizeof(DB_LSN) *
elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) {
__db_txnlist_end(dbenv, hp);
return (ret);
}
}
elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp;
return (0);
}
int
__db_txnlist_lsnget(dbenv, hp, lsnp, flags)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
DB_LSN *lsnp;
u_int32_t flags;
{
DB_TXNLIST *elp;
COMPQUIET(dbenv, NULL);
COMPQUIET(flags, 0);
LIST_FOREACH(elp, &hp->head[0], links)
if (elp->type == TXNLIST_LSN)
break;
if (elp == NULL || elp->u.l.stack_indx == 0) {
ZERO_LSN(*lsnp);
return (0);
}
*lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx];
return (0);
}
int
__db_txnlist_lsninit(dbenv, hp, lsnp)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
DB_LSN *lsnp;
{
DB_TXNLIST *elp;
int ret;
elp = NULL;
if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
goto err;
LIST_INSERT_HEAD(&hp->head[0], elp, links);
elp->type = TXNLIST_LSN;
if ((ret = __os_malloc(dbenv,
sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0)
goto err;
elp->u.l.stack_indx = 1;
elp->u.l.stack_size = DB_LSN_STACK_SIZE;
elp->u.l.lsn_stack[0] = *lsnp;
return (0);
err: __db_txnlist_end(dbenv, hp);
return (ret);
}
int
__db_add_limbo(dbenv, hp, fileid, pgno, count)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
int32_t fileid;
db_pgno_t pgno;
u_int32_t count;
{
DB_LOG *dblp;
FNAME *fnp;
int ret;
dblp = dbenv->lg_handle;
if ((ret = __dbreg_id_to_fname(dblp, fileid, 0, &fnp)) != 0)
return (ret);
do {
if ((ret =
__db_txnlist_pgnoadd(dbenv, hp, fileid, fnp->ufid,
R_ADDR(&dblp->reginfo, fnp->name_off), pgno)) != 0)
return (ret);
pgno++;
} while (--count != 0);
return (0);
}
int
__db_do_the_limbo(dbenv, ptxn, txn, hp, state)
DB_ENV *dbenv;
DB_TXN *ptxn, *txn;
DB_TXNHEAD *hp;
db_limbo_state state;
{
DB_TXNLIST *elp;
u_int32_t h;
int ret;
ret = 0;
for (h = 0; h < hp->nslots; h++) {
if ((elp = LIST_FIRST(&hp->head[h])) == NULL)
continue;
if (ptxn != NULL) {
if ((ret =
__db_limbo_move(dbenv, ptxn, txn, elp)) != 0)
goto err;
} else if ((ret =
__db_limbo_bucket(dbenv, txn, elp, state)) != 0)
goto err;
}
err: if (ret != 0) {
__db_errx(dbenv, "Fatal error in abort of an allocation");
ret = __db_panic(dbenv, ret);
}
return (ret);
}
static int
__db_lock_move(dbenv, fileid, pgno, mode, ptxn, txn)
DB_ENV *dbenv;
u_int8_t *fileid;
db_pgno_t pgno;
db_lockmode_t mode;
DB_TXN *ptxn, *txn;
{
DBT lock_dbt;
DB_LOCK lock;
DB_LOCK_ILOCK lock_obj;
DB_LOCKREQ req;
int ret;
lock_obj.pgno = pgno;
memcpy(lock_obj.fileid, fileid, DB_FILE_ID_LEN);
lock_obj.type = DB_PAGE_LOCK;
memset(&lock_dbt, 0, sizeof(lock_dbt));
lock_dbt.data = &lock_obj;
lock_dbt.size = sizeof(lock_obj);
if ((ret = __lock_get(dbenv,
txn->locker, 0, &lock_dbt, mode, &lock)) == 0) {
memset(&req, 0, sizeof(req));
req.lock = lock;
req.op = DB_LOCK_TRADE;
ret = __lock_vec(dbenv, ptxn->locker, 0, &req, 1, NULL);
}
return (ret);
}
static int
__db_limbo_move(dbenv, ptxn, txn, elp)
DB_ENV *dbenv;
DB_TXN *ptxn, *txn;
DB_TXNLIST *elp;
{
int ret;
for (; elp != NULL; elp = LIST_NEXT(elp, links)) {
if (elp->type != TXNLIST_PGNO || elp->u.p.locked == 1)
continue;
if ((ret = __db_lock_move(dbenv, elp->u.p.uid,
PGNO_BASE_MD, DB_LOCK_WRITE, ptxn, txn)) != 0)
return (ret);
elp->u.p.locked = 1;
}
return (0);
}
#define T_RESTORED(txn) ((txn) != NULL && F_ISSET(txn, TXN_RESTORED))
static int
__db_limbo_bucket(dbenv, txn, elp, state)
DB_ENV *dbenv;
DB_TXN *txn;
DB_TXNLIST *elp;
db_limbo_state state;
{
DB *dbp;
DB_MPOOLFILE *mpf;
DBMETA *meta;
DB_TXN *ctxn, *t;
FNAME *fname;
db_pgno_t last_pgno, pgno;
int dbp_created, in_retry, ret, t_ret;
ctxn = NULL;
in_retry = 0;
meta = NULL;
mpf = NULL;
ret = 0;
for (; elp != NULL; elp = LIST_NEXT(elp, links)) {
if (elp->type != TXNLIST_PGNO)
continue;
retry: dbp_created = 0;
if (state == LIMBO_PREPARE)
ctxn = txn;
else if (!in_retry && state != LIMBO_RECOVER &&
state != LIMBO_TIMESTAMP && !T_RESTORED(txn) &&
(ret = __txn_compensate_begin(dbenv, &ctxn)) != 0)
return (ret);
t = ctxn == NULL ? txn : ctxn;
ret = __dbreg_id_to_db(dbenv, t, &dbp, elp->u.p.fileid, 0);
if (ret == DB_DELETED || ret == ENOENT ||
((ret == 0 &&
memcmp(elp->u.p.uid, dbp->fileid, DB_FILE_ID_LEN) != 0))) {
if ((ret = __dbreg_fid_to_fname(
dbenv->lg_handle, elp->u.p.uid, 0, &fname)) == 0)
ret = __dbreg_id_to_db(
dbenv, t, &dbp, fname->id, 0);
}
if (ret == DB_DELETED ||
(ret == 0 && F_ISSET(dbp, DB_AM_DISCARD)))
goto next;
if (ret != 0) {
if ((ret = __db_create_internal(&dbp, dbenv, 0)) != 0)
goto err;
F_SET(dbp, DB_AM_COMPENSATE);
dbp_created = 1;
ret = __db_open(dbp, t, elp->u.p.fname, NULL,
DB_UNKNOWN, DB_ODDFILESIZE, __db_omode(OWNER_RW),
PGNO_BASE_MD);
if (ret == ENOENT)
goto next;
}
if (memcmp(elp->u.p.uid, dbp->fileid, DB_FILE_ID_LEN) != 0)
goto next;
mpf = dbp->mpf;
last_pgno = PGNO_INVALID;
if (meta == NULL &&
(ctxn == NULL || state == LIMBO_COMPENSATE)) {
pgno = PGNO_BASE_MD;
if ((ret = __memp_fget(mpf, &pgno, txn,
(state != LIMBO_PREPARE) ? DB_MPOOL_DIRTY : 0,
&meta)) != 0)
goto err;
last_pgno = meta->free;
}
if (state == LIMBO_PREPARE) {
if ((ret = __db_limbo_prepare(dbp, ctxn, elp)) != 0)
goto err;
} else
ret = __db_limbo_fix(dbp,
ctxn, elp, &last_pgno, meta, state);
if (ret != 0) {
if (ret == DB_RUNRECOVERY || ctxn == NULL)
goto err;
in_retry = 1;
if ((ret = __txn_abort(ctxn)) != 0)
goto err;
ctxn = NULL;
goto retry;
}
if (state == LIMBO_PREPARE)
ctxn = NULL;
else if (ctxn != NULL) {
if (state == LIMBO_COMPENSATE)
F_CLR(dbenv->lg_handle, DBLOG_RECOVER);
ret = __txn_commit(ctxn, DB_TXN_NOSYNC);
ctxn = NULL;
if (state == LIMBO_COMPENSATE)
F_SET(dbenv->lg_handle, DBLOG_RECOVER);
if (ret != 0)
goto retry;
}
else if (last_pgno == meta->free) {
if ((ret =
__memp_fput(mpf, meta, DB_PRIORITY_UNCHANGED)) != 0)
goto err;
meta = NULL;
} else {
if (!IS_RECOVERING(dbenv) && !T_RESTORED(txn))
__db_errx(dbenv, "Flushing free list to disk");
if ((ret =
__memp_fput(mpf, meta, DB_PRIORITY_UNCHANGED)) != 0)
goto err;
meta = NULL;
if ((ret = __db_sync(dbp)) == 0) {
pgno = PGNO_BASE_MD;
if ((ret = __memp_fget(mpf, &pgno, txn,
DB_MPOOL_DIRTY, &meta)) != 0)
goto err;
meta->free = last_pgno;
if ((ret = __memp_fput(mpf,
meta, DB_PRIORITY_UNCHANGED)) != 0)
goto err;
meta = NULL;
} else {
__db_err(dbenv, ret, "%s", dbp->fname);
__db_errx(dbenv, "%s: %s %s", dbp->fname,
"allocation flush failed, some free pages",
"may not appear in the free list");
ret = 0;
}
}
next:
if (ctxn != NULL &&
(t_ret = __txn_abort(ctxn)) != 0 && ret == 0)
ret = t_ret;
if (dbp_created &&
(t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0)
ret = t_ret;
dbp = NULL;
if (state != LIMBO_PREPARE && state != LIMBO_TIMESTAMP) {
__os_free(dbenv, elp->u.p.fname);
__os_free(dbenv, elp->u.p.pgno_array);
}
if (ret == ENOENT)
ret = 0;
else if (ret != 0)
goto err;
}
err: if (meta != NULL)
(void)__memp_fput(mpf, meta, DB_PRIORITY_UNCHANGED);
return (ret);
}
static int
__db_limbo_fix(dbp, ctxn, elp, lastp, meta, state)
DB *dbp;
DB_TXN *ctxn;
DB_TXNLIST *elp;
db_pgno_t *lastp;
DBMETA *meta;
db_limbo_state state;
{
DBC *dbc;
DBT ldbt;
DB_MPOOLFILE *mpf;
PAGE *freep, *pagep;
db_pgno_t next, pgno;
u_int32_t i;
int ret, t_ret;
dbc = NULL;
mpf = dbp->mpf;
pagep = NULL;
ret = 0;
for (i = 0; i < elp->u.p.nentries; i++) {
pgno = elp->u.p.pgno_array[i];
if (pgno == PGNO_INVALID)
continue;
if ((ret = __memp_fget(mpf, &pgno, ctxn,
DB_MPOOL_CREATE | DB_MPOOL_EDIT, &pagep)) != 0) {
if (ret != ENOSPC)
goto err;
continue;
}
if (state == LIMBO_COMPENSATE || IS_ZERO_LSN(LSN(pagep))) {
if (ctxn == NULL) {
for (next = *lastp; next != 0; ) {
if (next == pgno)
break;
if ((ret = __memp_fget(mpf,
&next, ctxn, 0, &freep)) != 0)
goto err;
next = NEXT_PGNO(freep);
if ((ret = __memp_fput(mpf,
freep, DB_PRIORITY_UNCHANGED)) != 0)
goto err;
}
if (next != pgno) {
P_INIT(pagep, dbp->pgsize, pgno,
PGNO_INVALID, *lastp, 0, P_INVALID);
INIT_LSN(LSN(pagep));
*lastp = pgno;
}
} else if (state == LIMBO_COMPENSATE) {
ZERO_LSN(pagep->lsn);
memset(&ldbt, 0, sizeof(ldbt));
ldbt.data = pagep;
ldbt.size = P_OVERHEAD(dbp);
if ((ret = __db_pg_new_log(dbp, ctxn,
&LSN(meta), 0, pagep->pgno,
&LSN(meta), PGNO_BASE_MD,
&ldbt, pagep->next_pgno)) != 0)
goto err;
} else {
if (dbc == NULL && (ret =
__db_cursor(dbp, ctxn, &dbc, 0)) != 0)
goto err;
F_SET(dbc, DBC_DONTLOCK);
F_CLR(dbc, DBC_RECOVER);
ret = __db_free(dbc, pagep);
pagep = NULL;
if (ret != 0) {
(void)__dbc_close(dbc);
dbc = NULL;
goto err;
}
}
}
else
elp->u.p.pgno_array[i] = PGNO_INVALID;
if (pagep != NULL) {
ret = __memp_fput(mpf, pagep, DB_PRIORITY_UNCHANGED);
pagep = NULL;
}
if (ret != 0)
goto err;
}
err: if (pagep != NULL &&
(t_ret = __memp_fput(mpf, pagep, DB_PRIORITY_UNCHANGED)) != 0 &&
ret == 0)
ret = t_ret;
if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
static int
__db_limbo_prepare(dbp, txn, elp)
DB *dbp;
DB_TXN *txn;
DB_TXNLIST *elp;
{
DB_LSN lsn;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
u_int32_t i;
int ret, t_ret;
pagep = NULL;
ret = 0;
mpf = dbp->mpf;
for (i = 0; i < elp->u.p.nentries; i++) {
pgno = elp->u.p.pgno_array[i];
if ((ret = __memp_fget(mpf,
&pgno, txn, DB_MPOOL_CREATE, &pagep)) != 0) {
if (ret != ENOSPC)
return (ret);
continue;
}
if (IS_ZERO_LSN(LSN(pagep)))
ret = __db_pg_prepare_log(dbp, txn, &lsn, 0, pgno);
if ((t_ret = __memp_fput(mpf,
pagep, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0)
ret = t_ret;
if (ret != 0)
return (ret);
}
return (0);
}
#define DB_TXNLIST_MAX_PGNO 8
static int
__db_txnlist_pgnoadd(dbenv, hp, fileid, uid, fname, pgno)
DB_ENV *dbenv;
DB_TXNHEAD *hp;
int32_t fileid;
u_int8_t uid[DB_FILE_ID_LEN];
char *fname;
db_pgno_t pgno;
{
DB_TXNLIST *elp;
size_t len;
u_int32_t hash, status;
int ret;
elp = NULL;
if ((ret = __db_txnlist_find_internal(dbenv, hp,
TXNLIST_PGNO, 0, uid, &elp, 0, &status)) != 0 && ret != DB_NOTFOUND)
goto err;
if (ret == DB_NOTFOUND || status != TXN_OK) {
if ((ret =
__os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0)
goto err;
memcpy(&hash, uid, sizeof(hash));
LIST_INSERT_HEAD(
&hp->head[DB_TXNLIST_MASK(hp, hash)], elp, links);
memcpy(elp->u.p.uid, uid, DB_FILE_ID_LEN);
len = strlen(fname) + 1;
if ((ret = __os_malloc(dbenv, len, &elp->u.p.fname)) != 0)
goto err;
memcpy(elp->u.p.fname, fname, len);
elp->u.p.maxentry = 0;
elp->u.p.locked = 0;
elp->type = TXNLIST_PGNO;
if ((ret = __os_malloc(dbenv,
8 * sizeof(db_pgno_t), &elp->u.p.pgno_array)) != 0)
goto err;
elp->u.p.maxentry = DB_TXNLIST_MAX_PGNO;
elp->u.p.nentries = 0;
} else if (elp->u.p.nentries == elp->u.p.maxentry) {
elp->u.p.maxentry <<= 1;
if ((ret = __os_realloc(dbenv, elp->u.p.maxentry *
sizeof(db_pgno_t), &elp->u.p.pgno_array)) != 0)
goto err;
}
elp->u.p.pgno_array[elp->u.p.nentries++] = pgno;
elp->u.p.fileid = fileid;
return (0);
err: return (ret);
}
#ifdef DEBUG
void
__db_txnlist_print(hp)
DB_TXNHEAD *hp;
{
DB_TXNLIST *p;
u_int32_t i;
char *txntype;
printf("Maxid: %lu Generation: %lu\n",
(u_long)hp->maxid, (u_long)hp->generation);
for (i = 0; i < hp->nslots; i++)
LIST_FOREACH(p, &hp->head[i], links) {
if (p->type != TXNLIST_TXNID) {
printf("Unrecognized type: %d\n", p->type);
continue;
}
switch (p->u.t.status) {
case TXN_OK:
txntype = "OK";
break;
case TXN_COMMIT:
txntype = "commit";
break;
case TXN_PREPARE:
txntype = "prepare";
break;
case TXN_ABORT:
txntype = "abort";
break;
case TXN_IGNORE:
txntype = "ignore";
break;
case TXN_EXPECTED:
txntype = "expected";
break;
case TXN_UNEXPECTED:
txntype = "unexpected";
break;
default:
txntype = "UNKNOWN";
break;
}
printf("TXNID: %lx(%lu): %s\n",
(u_long)p->u.t.txnid,
(u_long)p->u.t.generation, txntype);
}
}
#endif