#include "db_config.h"
#ifndef lint
static const char revid[] = "$Id: log_get.c,v 1.2 2004/03/30 01:23:43 jtownsen Exp $";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/crypto.h"
#include "dbinc/db_page.h"
#include "dbinc/hmac.h"
#include "dbinc/log.h"
#include "dbinc/hash.h"
typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK;
static int __log_c_close_pp __P((DB_LOGC *, u_int32_t));
static int __log_c_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
static int __log_c_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
static int __log_c_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *));
static int __log_c_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **));
static int __log_c_inregion __P((DB_LOGC *,
DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **));
static int __log_c_io __P((DB_LOGC *,
u_int32_t, u_int32_t, void *, size_t *, int *));
static int __log_c_ondisk __P((DB_LOGC *,
DB_LSN *, DB_LSN *, int, HDR *, u_int8_t **, int *));
static int __log_c_set_maxrec __P((DB_LOGC *, char *));
static int __log_c_shortread __P((DB_LOGC *, DB_LSN *, int));
int
__log_cursor_pp(dbenv, logcp, flags)
DB_ENV *dbenv;
DB_LOGC **logcp;
u_int32_t flags;
{
int rep_check, ret;
PANIC_CHECK(dbenv);
ENV_REQUIRES_CONFIG(dbenv,
dbenv->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG);
if ((ret = __db_fchk(dbenv, "DB_ENV->log_cursor", flags, 0)) != 0)
return (ret);
rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0;
if (rep_check)
__env_rep_enter(dbenv);
ret = __log_cursor(dbenv, logcp);
if (rep_check)
__env_rep_exit(dbenv);
return (ret);
}
int
__log_cursor(dbenv, logcp)
DB_ENV *dbenv;
DB_LOGC **logcp;
{
DB_LOGC *logc;
int ret;
*logcp = NULL;
if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOGC), &logc)) != 0)
return (ret);
logc->bp_size = DB_LOGC_BUF_SIZE;
logc->bp_maxrec = MEGABYTE;
if ((ret = __os_malloc(dbenv, logc->bp_size, &logc->bp)) != 0) {
__os_free(dbenv, logc);
return (ret);
}
logc->dbenv = dbenv;
logc->close = __log_c_close_pp;
logc->get = __log_c_get_pp;
*logcp = logc;
return (0);
}
static int
__log_c_close_pp(logc, flags)
DB_LOGC *logc;
u_int32_t flags;
{
DB_ENV *dbenv;
int rep_check, ret;
dbenv = logc->dbenv;
PANIC_CHECK(dbenv);
if ((ret = __db_fchk(dbenv, "DB_LOGC->close", flags, 0)) != 0)
return (ret);
rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0;
if (rep_check)
__env_rep_enter(dbenv);
ret = __log_c_close(logc);
if (rep_check)
__env_rep_exit(dbenv);
return (ret);
}
int
__log_c_close(logc)
DB_LOGC *logc;
{
DB_ENV *dbenv;
dbenv = logc->dbenv;
if (logc->c_fhp != NULL) {
(void)__os_closehandle(dbenv, logc->c_fhp);
logc->c_fhp = NULL;
}
if (logc->c_dbt.data != NULL)
__os_free(dbenv, logc->c_dbt.data);
__os_free(dbenv, logc->bp);
__os_free(dbenv, logc);
return (0);
}
static int
__log_c_get_pp(logc, alsn, dbt, flags)
DB_LOGC *logc;
DB_LSN *alsn;
DBT *dbt;
u_int32_t flags;
{
DB_ENV *dbenv;
int rep_check, ret;
dbenv = logc->dbenv;
PANIC_CHECK(dbenv);
switch (flags) {
case DB_CURRENT:
case DB_FIRST:
case DB_LAST:
case DB_NEXT:
case DB_PREV:
break;
case DB_SET:
if (IS_ZERO_LSN(*alsn)) {
__db_err(dbenv, "DB_LOGC->get: invalid LSN: %lu/%lu",
(u_long)alsn->file, (u_long)alsn->offset);
return (EINVAL);
}
break;
default:
return (__db_ferr(dbenv, "DB_LOGC->get", 1));
}
rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0;
if (rep_check)
__env_rep_enter(dbenv);
ret = __log_c_get(logc, alsn, dbt, flags);
if (rep_check)
__env_rep_exit(dbenv);
return (ret);
}
int
__log_c_get(logc, alsn, dbt, flags)
DB_LOGC *logc;
DB_LSN *alsn;
DBT *dbt;
u_int32_t flags;
{
DB_ENV *dbenv;
DB_LSN saved_lsn;
int ret;
dbenv = logc->dbenv;
saved_lsn = *alsn;
if ((ret = __log_c_get_int(logc, alsn, dbt, flags)) != 0) {
*alsn = saved_lsn;
return (ret);
}
if (alsn->offset == 0 && (flags == DB_FIRST ||
flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) {
switch (flags) {
case DB_FIRST:
flags = DB_NEXT;
break;
case DB_LAST:
flags = DB_PREV;
break;
}
if (F_ISSET(dbt, DB_DBT_MALLOC)) {
__os_free(dbenv, dbt->data);
dbt->data = NULL;
}
if ((ret = __log_c_get_int(logc, alsn, dbt, flags)) != 0) {
*alsn = saved_lsn;
return (ret);
}
}
return (0);
}
static int
__log_c_get_int(logc, alsn, dbt, flags)
DB_LOGC *logc;
DB_LSN *alsn;
DBT *dbt;
u_int32_t flags;
{
DB_CIPHER *db_cipher;
DB_ENV *dbenv;
DB_LOG *dblp;
DB_LSN last_lsn, nlsn;
HDR hdr;
LOG *lp;
RLOCK rlock;
logfile_validity status;
u_int32_t cnt;
u_int8_t *rp;
int eof, is_hmac, ret;
dbenv = logc->dbenv;
dblp = dbenv->lg_handle;
lp = dblp->reginfo.primary;
is_hmac = 0;
rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE;
nlsn = logc->c_lsn;
switch (flags) {
case DB_NEXT:
if (!IS_ZERO_LSN(nlsn)) {
nlsn.offset += logc->c_len;
break;
}
flags = DB_FIRST;
case DB_FIRST:
if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0)
goto err;
switch (status) {
case DB_LV_INCOMPLETE:
DB_ASSERT(lp->lsn.file == cnt);
case DB_LV_NORMAL:
case DB_LV_OLD_READABLE:
nlsn.file = cnt;
break;
case DB_LV_NONEXISTENT:
nlsn.file = 1;
DB_ASSERT(lp->lsn.file == nlsn.file);
break;
case DB_LV_OLD_UNREADABLE:
nlsn.file = cnt + 1;
DB_ASSERT(lp->lsn.file == nlsn.file);
break;
}
nlsn.offset = 0;
break;
case DB_CURRENT:
break;
case DB_PREV:
if (!IS_ZERO_LSN(nlsn)) {
if (nlsn.offset == 0) {
if (nlsn.file == 1 || __log_valid(dblp,
nlsn.file - 1, 0, NULL, 0, &status) != 0) {
ret = DB_NOTFOUND;
goto err;
}
if (status != DB_LV_NORMAL &&
status != DB_LV_OLD_READABLE) {
ret = DB_NOTFOUND;
goto err;
}
--nlsn.file;
}
nlsn.offset = logc->c_prev;
break;
}
case DB_LAST:
if (rlock == L_NONE) {
rlock = L_ACQUIRED;
R_LOCK(dbenv, &dblp->reginfo);
}
nlsn.file = lp->lsn.file;
nlsn.offset = lp->lsn.offset - lp->len;
break;
case DB_SET:
nlsn = *alsn;
break;
}
if (0) {
next_file: ++nlsn.file;
nlsn.offset = 0;
}
if (CRYPTO_ON(dbenv)) {
hdr.size = HDR_CRYPTO_SZ;
is_hmac = 1;
} else {
hdr.size = HDR_NORMAL_SZ;
is_hmac = 0;
}
if ((ret = __log_c_incursor(logc, &nlsn, &hdr, &rp)) != 0)
goto err;
if (rp != NULL)
goto cksum;
ZERO_LSN(last_lsn);
if (!F_ISSET(logc, DB_LOG_DISK) ||
log_compare(&nlsn, &logc->c_lsn) > 0) {
F_CLR(logc, DB_LOG_DISK);
if ((ret = __log_c_inregion(logc,
&nlsn, &rlock, &last_lsn, &hdr, &rp)) != 0)
goto err;
if (rp != NULL)
goto cksum;
}
if (rlock == L_ACQUIRED) {
rlock = L_NONE;
R_UNLOCK(dbenv, &dblp->reginfo);
}
if ((ret = __log_c_ondisk(
logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0)
goto err;
if (eof == 1) {
if (flags != DB_NEXT || nlsn.offset == 0)
return (DB_NOTFOUND);
goto next_file;
}
F_SET(logc, DB_LOG_DISK);
cksum:
if (rlock == L_ACQUIRED) {
rlock = L_NONE;
R_UNLOCK(dbenv, &dblp->reginfo);
}
db_cipher = dbenv->crypto_handle;
if ((ret = __db_check_chksum(dbenv, db_cipher,
hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) {
if (F_ISSET(logc, DB_LOG_SILENT_ERR)) {
if (ret == 0 || ret == -1)
ret = EIO;
} else if (ret == -1) {
__db_err(dbenv,
"DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch",
(u_long)nlsn.file, (u_long)nlsn.offset);
__db_err(dbenv,
"DB_LOGC->get: catastrophic recovery may be required");
ret = __db_panic(dbenv, DB_RUNRECOVERY);
}
goto err;
}
if (hdr.len == 0) {
switch (flags) {
case DB_FIRST:
case DB_NEXT:
goto next_file;
case DB_LAST:
case DB_PREV:
__db_err(dbenv,
"Encountered zero length records while traversing backwards");
DB_ASSERT(0);
case DB_SET:
default:
break;
}
}
if ((ret = __db_retcopy(dbenv, dbt, rp + hdr.size,
(u_int32_t)(hdr.len - hdr.size),
&logc->c_dbt.data, &logc->c_dbt.ulen)) != 0)
goto err;
if (CRYPTO_ON(dbenv)) {
if ((ret = db_cipher->decrypt(dbenv, db_cipher->data,
hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) {
ret = EAGAIN;
goto err;
}
dbt->size = hdr.orig_size;
}
*alsn = nlsn;
logc->c_lsn = nlsn;
logc->c_len = hdr.len;
logc->c_prev = hdr.prev;
err: if (rlock == L_ACQUIRED)
R_UNLOCK(dbenv, &dblp->reginfo);
return (ret);
}
static int
__log_c_incursor(logc, lsn, hdr, pp)
DB_LOGC *logc;
DB_LSN *lsn;
HDR *hdr;
u_int8_t **pp;
{
u_int8_t *p;
int eof;
*pp = NULL;
if (logc->bp_lsn.file != lsn->file)
return (0);
if (logc->bp_lsn.offset > lsn->offset)
return (0);
if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size)
return (0);
p = logc->bp + (lsn->offset - logc->bp_lsn.offset);
memcpy(hdr, p, hdr->size);
if (__log_c_hdrchk(logc, lsn, hdr, &eof))
return (DB_NOTFOUND);
if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len)
return (0);
*pp = p;
return (0);
}
static int
__log_c_inregion(logc, lsn, rlockp, last_lsn, hdr, pp)
DB_LOGC *logc;
DB_LSN *lsn, *last_lsn;
RLOCK *rlockp;
HDR *hdr;
u_int8_t **pp;
{
DB_ENV *dbenv;
DB_LOG *dblp;
LOG *lp;
size_t len, nr;
u_int32_t b_disk, b_region;
int ret;
u_int8_t *p;
dbenv = logc->dbenv;
dblp = dbenv->lg_handle;
lp = ((DB_LOG *)logc->dbenv->lg_handle)->reginfo.primary;
ret = 0;
*pp = NULL;
if (*rlockp == L_NONE) {
*rlockp = L_ACQUIRED;
R_LOCK(dbenv, &dblp->reginfo);
}
*last_lsn = lp->lsn;
if (last_lsn->offset > lp->w_off)
last_lsn->offset = lp->w_off;
if (IS_ZERO_LSN(lp->lsn))
return (0);
if (lsn->file > lp->lsn.file ||
(lsn->file == lp->lsn.file && lsn->offset >= lp->lsn.offset))
return (DB_NOTFOUND);
if (lp->b_off == 0)
return (0);
if (lsn->file < lp->f_lsn.file || lsn->offset < lp->f_lsn.offset)
return (0);
ZERO_LSN(logc->bp_lsn);
if (lsn->offset > lp->f_lsn.offset) {
p = dblp->bufp + (lsn->offset - lp->w_off);
memcpy(hdr, p, hdr->size);
if (__log_c_hdrchk(logc, lsn, hdr, NULL))
return (DB_NOTFOUND);
if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size)
return (DB_NOTFOUND);
if (logc->bp_size <= hdr->len) {
len = ALIGN(hdr->len * 2, 128);
if ((ret =
__os_realloc(logc->dbenv, len, &logc->bp)) != 0)
return (ret);
logc->bp_size = (u_int32_t)len;
}
memcpy(logc->bp, p, hdr->len);
*pp = logc->bp;
return (0);
}
b_disk = lp->w_off - lsn->offset;
if (lp->b_off <= lp->len)
b_region = (u_int32_t)lp->b_off;
else
for (p = dblp->bufp + (lp->b_off - lp->len);;) {
memcpy(hdr, p, hdr->size);
if (hdr->prev == lsn->offset) {
b_region = (u_int32_t)(p - dblp->bufp);
break;
}
p = dblp->bufp + (hdr->prev - lp->w_off);
}
if (logc->bp_size <= b_region + b_disk) {
len = ALIGN((b_region + b_disk) * 2, 128);
if ((ret = __os_realloc(logc->dbenv, len, &logc->bp)) != 0)
return (ret);
logc->bp_size = (u_int32_t)len;
}
p = (logc->bp + logc->bp_size) - b_region;
memcpy(p, dblp->bufp, b_region);
if (*rlockp == L_ACQUIRED) {
*rlockp = L_NONE;
R_UNLOCK(dbenv, &dblp->reginfo);
}
if (b_disk != 0) {
p -= b_disk;
nr = b_disk;
if ((ret = __log_c_io(
logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0)
return (ret);
if (nr < b_disk)
return (__log_c_shortread(logc, lsn, 0));
}
memcpy(hdr, p, hdr->size);
*pp = p;
return (0);
}
static int
__log_c_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp)
DB_LOGC *logc;
DB_LSN *lsn, *last_lsn;
int flags, *eofp;
HDR *hdr;
u_int8_t **pp;
{
DB_ENV *dbenv;
size_t len, nr;
u_int32_t offset;
int ret;
dbenv = logc->dbenv;
*eofp = 0;
nr = hdr->size;
if ((ret =
__log_c_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0)
return (ret);
if (*eofp)
return (0);
if (nr == 0) {
*eofp = 1;
return (0);
}
if ((ret = __log_c_hdrchk(logc, lsn, hdr, eofp)) != 0)
return (ret);
if (*eofp)
return (0);
if (nr < hdr->size)
return (__log_c_shortread(logc, lsn, 1));
ZERO_LSN(logc->bp_lsn);
if (logc->bp_size <= hdr->len) {
len = ALIGN(hdr->len * 2, 128);
if ((ret = __os_realloc(dbenv, len, &logc->bp)) != 0)
return (ret);
logc->bp_size = (u_int32_t)len;
}
if (flags == DB_FIRST || flags == DB_NEXT)
offset = lsn->offset;
else if (lsn->offset + hdr->len < logc->bp_size)
offset = 0;
else
offset = (lsn->offset + hdr->len) - logc->bp_size;
nr = logc->bp_size;
if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset)
nr = last_lsn->offset - offset;
if ((ret =
__log_c_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0)
return (ret);
if (nr < (lsn->offset + hdr->len) - offset)
return (__log_c_shortread(logc, lsn, 1));
logc->bp_rlen = (u_int32_t)nr;
logc->bp_lsn.file = lsn->file;
logc->bp_lsn.offset = offset;
*pp = logc->bp + (lsn->offset - offset);
return (0);
}
static int
__log_c_hdrchk(logc, lsn, hdr, eofp)
DB_LOGC *logc;
DB_LSN *lsn;
HDR *hdr;
int *eofp;
{
DB_ENV *dbenv;
int ret;
dbenv = logc->dbenv;
if (eofp != NULL) {
if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) {
*eofp = 1;
return (0);
}
*eofp = 0;
}
if (hdr->len <= hdr->size)
goto err;
if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) {
if ((ret = __log_c_set_maxrec(logc, NULL)) != 0) {
__db_err(dbenv, "DB_LOGC->get: %s", db_strerror(ret));
return (ret);
}
if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec)
goto err;
}
return (0);
err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
__db_err(dbenv,
"DB_LOGC->get: LSN %lu/%lu: invalid log record header",
(u_long)lsn->file, (u_long)lsn->offset);
return (EIO);
}
static int
__log_c_io(logc, fnum, offset, p, nrp, eofp)
DB_LOGC *logc;
u_int32_t fnum, offset;
void *p;
size_t *nrp;
int *eofp;
{
DB_ENV *dbenv;
DB_LOG *dblp;
int ret;
char *np;
dbenv = logc->dbenv;
dblp = dbenv->lg_handle;
if (logc->c_fhp != NULL && logc->bp_lsn.file != fnum) {
ret = __os_closehandle(dbenv, logc->c_fhp);
logc->c_fhp = NULL;
if (ret != 0)
return (ret);
}
if (logc->c_fhp == NULL) {
if ((ret = __log_name(dblp, fnum,
&np, &logc->c_fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
if (eofp != NULL) {
*eofp = 1;
ret = 0;
} else if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
__db_err(dbenv, "DB_LOGC->get: %s: %s",
np, db_strerror(ret));
__os_free(dbenv, np);
return (ret);
}
if ((ret = __log_c_set_maxrec(logc, np)) != 0) {
__db_err(dbenv,
"DB_LOGC->get: %s: %s", np, db_strerror(ret));
__os_free(dbenv, np);
return (ret);
}
__os_free(dbenv, np);
}
if ((ret = __os_seek(dbenv,
logc->c_fhp, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) {
if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
__db_err(dbenv,
"DB_LOGC->get: LSN: %lu/%lu: seek: %s",
(u_long)fnum, (u_long)offset, db_strerror(ret));
return (ret);
}
if ((ret = __os_read(dbenv, logc->c_fhp, p, *nrp, nrp)) != 0) {
if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
__db_err(dbenv,
"DB_LOGC->get: LSN: %lu/%lu: read: %s",
(u_long)fnum, (u_long)offset, db_strerror(ret));
return (ret);
}
return (0);
}
static int
__log_c_shortread(logc, lsn, check_silent)
DB_LOGC *logc;
DB_LSN *lsn;
int check_silent;
{
if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR))
__db_err(logc->dbenv, "DB_LOGC->get: LSN: %lu/%lu: short read",
(u_long)lsn->file, (u_long)lsn->offset);
return (EIO);
}
static int
__log_c_set_maxrec(logc, np)
DB_LOGC *logc;
char *np;
{
DB_ENV *dbenv;
DB_LOG *dblp;
LOG *lp;
u_int32_t mbytes, bytes;
int ret;
dbenv = logc->dbenv;
dblp = dbenv->lg_handle;
if (logc->c_fhp != NULL) {
if ((ret = __os_ioinfo(dbenv, np, logc->c_fhp,
&mbytes, &bytes, NULL)) != 0)
return (ret);
if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes))
logc->bp_maxrec = mbytes * MEGABYTE + bytes;
}
lp = dblp->reginfo.primary;
if (logc->bp_maxrec < lp->buffer_size)
logc->bp_maxrec = lp->buffer_size;
return (0);
}