#include "db_config.h"
#ifndef lint
static const char revid[] = "$Id: log.c,v 1.1.1.1 2003/02/15 04:56:08 zarzycki Exp $";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "dbinc/crypto.h"
#include "dbinc/hmac.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
static int __log_init __P((DB_ENV *, DB_LOG *));
static int __log_recover __P((DB_LOG *));
static size_t __log_region_size __P((DB_ENV *));
static int __log_zero __P((DB_ENV *, DB_LSN *, DB_LSN *));
int
__log_open(dbenv)
DB_ENV *dbenv;
{
DB_LOG *dblp;
LOG *lp;
int ret;
if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOG), &dblp)) != 0)
return (ret);
dblp->dbenv = dbenv;
dblp->reginfo.type = REGION_TYPE_LOG;
dblp->reginfo.id = INVALID_REGION_ID;
dblp->reginfo.mode = dbenv->db_mode;
dblp->reginfo.flags = REGION_JOIN_OK;
if (F_ISSET(dbenv, DB_ENV_CREATE))
F_SET(&dblp->reginfo, REGION_CREATE_OK);
if ((ret = __db_r_attach(
dbenv, &dblp->reginfo, __log_region_size(dbenv))) != 0)
goto err;
if (F_ISSET(&dblp->reginfo, REGION_CREATE))
if ((ret = __log_init(dbenv, dblp)) != 0)
goto err;
lp = dblp->reginfo.primary =
R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary);
if (F_ISSET(dbenv, DB_ENV_THREAD) &&
(ret = __db_mutex_setup(dbenv, &dblp->reginfo, &dblp->mutexp,
MUTEX_ALLOC | MUTEX_NO_RLOCK)) != 0)
goto err;
dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off);
dbenv->lg_handle = dblp;
if (F_ISSET(&dblp->reginfo, REGION_CREATE)) {
if ((ret = __log_recover(dblp)) != 0) {
dbenv->lg_handle = NULL;
goto err;
}
if (lp->log_size == 0)
lp->log_size = lp->log_nsize = LG_MAX_DEFAULT;
} else {
if (dbenv->lg_size != 0)
lp->log_nsize = dbenv->lg_size;
}
R_UNLOCK(dbenv, &dblp->reginfo);
return (0);
err: if (dblp->reginfo.addr != NULL) {
if (F_ISSET(&dblp->reginfo, REGION_CREATE))
ret = __db_panic(dbenv, ret);
R_UNLOCK(dbenv, &dblp->reginfo);
(void)__db_r_detach(dbenv, &dblp->reginfo, 0);
}
if (dblp->mutexp != NULL)
__db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp);
__os_free(dbenv, dblp);
return (ret);
}
static int
__log_init(dbenv, dblp)
DB_ENV *dbenv;
DB_LOG *dblp;
{
DB_MUTEX *flush_mutexp;
LOG *region;
int ret;
void *p;
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
u_int8_t *addr;
#endif
if ((ret = __db_shalloc(dblp->reginfo.addr,
sizeof(*region), 0, &dblp->reginfo.primary)) != 0)
goto mem_err;
dblp->reginfo.rp->primary =
R_OFFSET(&dblp->reginfo, dblp->reginfo.primary);
region = dblp->reginfo.primary;
memset(region, 0, sizeof(*region));
region->fid_max = 0;
SH_TAILQ_INIT(®ion->fq);
region->free_fid_stack = INVALID_ROFF;
region->free_fids = region->free_fids_alloced = 0;
INIT_LSN(region->lsn);
INIT_LSN(region->ready_lsn);
INIT_LSN(region->t_lsn);
ZERO_LSN(region->waiting_lsn);
ZERO_LSN(region->cached_ckp_lsn);
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
if ((ret = __db_shalloc(dblp->reginfo.addr,
sizeof(REGMAINT) + LG_MAINT_SIZE, 0, &addr)) != 0)
goto mem_err;
__db_maintinit(&dblp->reginfo, addr, LG_MAINT_SIZE);
region->maint_off = R_OFFSET(&dblp->reginfo, addr);
#endif
if ((ret = __db_mutex_setup(dbenv, &dblp->reginfo, ®ion->fq_mutex,
MUTEX_NO_RLOCK)) != 0)
return (ret);
if ((ret = __db_shalloc(dblp->reginfo.addr,
sizeof(DB_MUTEX), MUTEX_ALIGN, &flush_mutexp)) != 0)
goto mem_err;
if ((ret = __db_mutex_setup(dbenv, &dblp->reginfo, flush_mutexp,
MUTEX_NO_RLOCK)) != 0)
return (ret);
region->flush_mutex_off = R_OFFSET(&dblp->reginfo, flush_mutexp);
if ((ret =
__db_shalloc(dblp->reginfo.addr, dbenv->lg_bsize, 0, &p)) != 0) {
mem_err: __db_err(dbenv, "Unable to allocate memory for the log buffer");
return (ret);
}
region->buffer_size = dbenv->lg_bsize;
region->buffer_off = R_OFFSET(&dblp->reginfo, p);
region->log_size = region->log_nsize = dbenv->lg_size;
SH_TAILQ_INIT(®ion->free_commits);
SH_TAILQ_INIT(®ion->commits);
region->ncommit = 0;
region->persist.magic = DB_LOGMAGIC;
region->persist.version = DB_LOGVERSION;
region->persist.mode = (u_int32_t)dbenv->db_mode;
return (0);
}
static int
__log_recover(dblp)
DB_LOG *dblp;
{
DBT dbt;
DB_ENV *dbenv;
DB_LOGC *logc;
DB_LSN lsn;
LOG *lp;
u_int32_t cnt, rectype;
int ret;
logfile_validity status;
logc = NULL;
dbenv = dblp->dbenv;
lp = dblp->reginfo.primary;
if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0)
return (ret);
if (cnt == 0)
return (0);
if (status == DB_LV_OLD_READABLE || status == DB_LV_OLD_UNREADABLE) {
lp->lsn.file = lp->s_lsn.file = cnt + 1;
lp->lsn.offset = lp->s_lsn.offset = 0;
goto skipsearch;
}
DB_ASSERT(status == DB_LV_NORMAL);
lp->lsn.file = cnt + 1;
lp->lsn.offset = 0;
lsn.file = cnt;
lsn.offset = 0;
if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
return (ret);
F_SET(logc, DB_LOG_LOCKED);
memset(&dbt, 0, sizeof(dbt));
if ((ret = logc->get(logc, &lsn, &dbt, DB_SET)) != 0)
goto err;
F_SET(logc, DB_LOG_SILENT_ERR);
while (logc->get(logc, &lsn, &dbt, DB_NEXT) == 0) {
if (dbt.size < sizeof(u_int32_t))
continue;
memcpy(&rectype, dbt.data, sizeof(u_int32_t));
if (rectype == DB___txn_ckp)
lp->cached_ckp_lsn = lsn;
}
F_CLR(logc, DB_LOG_SILENT_ERR);
lp->lsn = lsn;
lp->s_lsn = lsn;
lp->lsn.offset += logc->c_len;
lp->s_lsn.offset += logc->c_len;
lp->len = logc->c_len;
lp->b_off = 0;
lp->w_off = lp->lsn.offset;
skipsearch:
if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
__db_err(dbenv,
"Finding last valid log LSN: file: %lu offset %lu",
(u_long)lp->lsn.file, (u_long)lp->lsn.offset);
err: if (logc != NULL)
(void)logc->close(logc, 0);
return (ret);
}
int
__log_find(dblp, find_first, valp, statusp)
DB_LOG *dblp;
int find_first;
u_int32_t *valp;
logfile_validity *statusp;
{
DB_ENV *dbenv;
logfile_validity logval_status, status;
u_int32_t clv, logval;
int cnt, fcnt, ret;
const char *dir;
char *c, **names, *p, *q, savech;
dbenv = dblp->dbenv;
logval_status = status = DB_LV_NONEXISTENT;
*valp = 0;
if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0)
return (ret);
if ((q = __db_rpath(p)) == NULL) {
COMPQUIET(savech, 0);
dir = PATH_DOT;
} else {
savech = *q;
*q = '\0';
dir = p;
}
ret = __os_dirlist(dbenv, dir, &names, &fcnt);
if (q != NULL)
*q = savech;
if (ret != 0) {
__db_err(dbenv, "%s: %s", dir, db_strerror(ret));
__os_free(dbenv, p);
return (ret);
}
for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) {
if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0)
continue;
for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++)
if (!isdigit((int)*c))
break;
if (*c != '\0')
continue;
clv = atol(names[cnt] + (sizeof(LFPREFIX) - 1));
if (find_first) {
if (logval != 0 &&
status != DB_LV_OLD_UNREADABLE && clv > logval)
continue;
} else
if (logval != 0 && clv < logval)
continue;
if ((ret = __log_valid(dblp, clv, 1, &status)) != 0) {
__db_err(dbenv, "Invalid log file: %s: %s",
names[cnt], db_strerror(ret));
goto err;
}
switch (status) {
case DB_LV_NONEXISTENT:
DB_ASSERT(0);
break;
case DB_LV_INCOMPLETE:
if (find_first)
goto found;
break;
case DB_LV_OLD_UNREADABLE:
if (!find_first || logval == 0 ||
(status == DB_LV_OLD_UNREADABLE && clv > logval))
goto found;
break;
case DB_LV_NORMAL:
case DB_LV_OLD_READABLE:
found: logval = clv;
logval_status = status;
break;
}
}
*valp = logval;
err: __os_dirfree(dbenv, names, fcnt);
__os_free(dbenv, p);
*statusp = logval_status;
return (ret);
}
int
__log_valid(dblp, number, set_persist, statusp)
DB_LOG *dblp;
u_int32_t number;
int set_persist;
logfile_validity *statusp;
{
DB_CIPHER *db_cipher;
DB_ENV *dbenv;
DB_FH fh;
HDR *hdr;
LOG *region;
LOGP *persist;
logfile_validity status;
size_t hdrsize, nw, recsize;
int is_hmac, need_free, ret;
u_int8_t *tmp;
char *fname;
dbenv = dblp->dbenv;
db_cipher = dbenv->crypto_handle;
persist = NULL;
status = DB_LV_NORMAL;
if ((ret = __log_name(dblp,
number, &fname, &fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
__os_free(dbenv, fname);
return (ret);
}
need_free = 0;
hdrsize = HDR_NORMAL_SZ;
is_hmac = 0;
recsize = sizeof(LOGP);
if (CRYPTO_ON(dbenv)) {
hdrsize = HDR_CRYPTO_SZ;
recsize = sizeof(LOGP);
recsize += db_cipher->adj_size(recsize);
is_hmac = 1;
}
if ((ret = __os_calloc(dbenv, 1, recsize + hdrsize, &tmp)) != 0)
return (ret);
need_free = 1;
hdr = (HDR *)tmp;
persist = (LOGP *)(tmp + hdrsize);
if ((ret = __os_read(dbenv, &fh, tmp, recsize + hdrsize, &nw)) != 0 ||
nw != recsize + hdrsize) {
if (ret == 0)
status = DB_LV_INCOMPLETE;
else
__db_err(dbenv, "Ignoring log file: %s: %s",
fname, db_strerror(ret));
(void)__os_closehandle(dbenv, &fh);
goto err;
}
(void)__os_closehandle(dbenv, &fh);
if (CRYPTO_ON(dbenv)) {
if ((hdr->len - hdrsize) != sizeof(LOGP)) {
__db_err(dbenv, "log record size mismatch");
goto err;
}
if ((ret = __db_check_chksum(dbenv, db_cipher, &hdr->chksum[0],
(u_int8_t *)persist, hdr->len - hdrsize, is_hmac)) != 0) {
__db_err(dbenv, "log record checksum mismatch");
goto err;
}
if ((ret = db_cipher->decrypt(dbenv, db_cipher->data,
&hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0)
goto err;
}
if (persist->magic != DB_LOGMAGIC) {
__db_err(dbenv,
"Ignoring log file: %s: magic number %lx, not %lx",
fname, (u_long)persist->magic, (u_long)DB_LOGMAGIC);
ret = EINVAL;
goto err;
}
if (persist->version > DB_LOGVERSION) {
__db_err(dbenv,
"Ignoring log file: %s: unsupported log version %lu",
fname, (u_long)persist->version);
ret = EINVAL;
goto err;
} else if (persist->version < DB_LOGOLDVER) {
status = DB_LV_OLD_UNREADABLE;
goto err;
} else if (persist->version < DB_LOGVERSION)
status = DB_LV_OLD_READABLE;
if (!CRYPTO_ON(dbenv) && ((ret = __db_check_chksum(dbenv,
db_cipher, &hdr->chksum[0], (u_int8_t *)persist,
hdr->len - hdrsize, is_hmac)) != 0)) {
__db_err(dbenv, "log record checksum mismatch");
goto err;
}
if (set_persist) {
region = dblp->reginfo.primary;
region->log_size = persist->log_size;
if (region->log_nsize == 0)
region->log_nsize = persist->log_size;
region->persist.mode = persist->mode;
}
err: __os_free(dbenv, fname);
if (need_free)
__os_free(dbenv, tmp);
*statusp = status;
return (ret);
}
int
__log_dbenv_refresh(dbenv)
DB_ENV *dbenv;
{
DB_LOG *dblp;
int ret, t_ret;
dblp = dbenv->lg_handle;
F_SET(dblp, DBLOG_RECOVER);
ret = __dbreg_close_files(dbenv);
if (dblp->mutexp != NULL)
__db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp);
if ((t_ret =
__db_r_detach(dbenv, &dblp->reginfo, 0)) != 0 && ret == 0)
ret = t_ret;
if (F_ISSET(&dblp->lfh, DB_FH_VALID) &&
(t_ret = __os_closehandle(dbenv, &dblp->lfh)) != 0 && ret == 0)
ret = t_ret;
if (dblp->dbentry != NULL)
__os_free(dbenv, dblp->dbentry);
__os_free(dbenv, dblp);
dbenv->lg_handle = NULL;
return (ret);
}
int
__log_stat(dbenv, statp, flags)
DB_ENV *dbenv;
DB_LOG_STAT **statp;
u_int32_t flags;
{
DB_LOG *dblp;
DB_LOG_STAT *stats;
LOG *region;
int ret;
PANIC_CHECK(dbenv);
ENV_REQUIRES_CONFIG(dbenv,
dbenv->lg_handle, "DB_ENV->log_stat", DB_INIT_LOG);
*statp = NULL;
if ((ret = __db_fchk(dbenv,
"DB_ENV->log_stat", flags, DB_STAT_CLEAR)) != 0)
return (ret);
dblp = dbenv->lg_handle;
region = dblp->reginfo.primary;
if ((ret = __os_umalloc(dbenv, sizeof(DB_LOG_STAT), &stats)) != 0)
return (ret);
R_LOCK(dbenv, &dblp->reginfo);
*stats = region->stat;
if (LF_ISSET(DB_STAT_CLEAR))
memset(®ion->stat, 0, sizeof(region->stat));
stats->st_magic = region->persist.magic;
stats->st_version = region->persist.version;
stats->st_mode = region->persist.mode;
stats->st_lg_bsize = region->buffer_size;
stats->st_lg_size = region->log_nsize;
stats->st_region_wait = dblp->reginfo.rp->mutex.mutex_set_wait;
stats->st_region_nowait = dblp->reginfo.rp->mutex.mutex_set_nowait;
if (LF_ISSET(DB_STAT_CLEAR)) {
dblp->reginfo.rp->mutex.mutex_set_wait = 0;
dblp->reginfo.rp->mutex.mutex_set_nowait = 0;
}
stats->st_regsize = dblp->reginfo.rp->size;
stats->st_cur_file = region->lsn.file;
stats->st_cur_offset = region->lsn.offset;
stats->st_disk_file = region->s_lsn.file;
stats->st_disk_offset = region->s_lsn.offset;
R_UNLOCK(dbenv, &dblp->reginfo);
*statp = stats;
return (0);
}
void
__log_get_cached_ckp_lsn(dbenv, ckp_lsnp)
DB_ENV *dbenv;
DB_LSN *ckp_lsnp;
{
DB_LOG *dblp;
LOG *lp;
dblp = (DB_LOG *)dbenv->lg_handle;
lp = (LOG *)dblp->reginfo.primary;
R_LOCK(dbenv, &dblp->reginfo);
*ckp_lsnp = lp->cached_ckp_lsn;
R_UNLOCK(dbenv, &dblp->reginfo);
}
static size_t
__log_region_size(dbenv)
DB_ENV *dbenv;
{
size_t s;
s = dbenv->lg_regionmax + dbenv->lg_bsize;
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
if (F_ISSET(dbenv, DB_ENV_THREAD))
s += sizeof(REGMAINT) + LG_MAINT_SIZE;
#endif
return (s);
}
void
__log_region_destroy(dbenv, infop)
DB_ENV *dbenv;
REGINFO *infop;
{
__db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop,
((LOG *)R_ADDR(infop, infop->rp->primary))->maint_off));
COMPQUIET(dbenv, NULL);
COMPQUIET(infop, NULL);
}
int
__log_vtruncate(dbenv, lsn, ckplsn)
DB_ENV *dbenv;
DB_LSN *lsn, *ckplsn;
{
DBT log_dbt;
DB_FH fh;
DB_LOG *dblp;
DB_LOGC *logc;
DB_LSN end_lsn;
LOG *lp;
u_int32_t bytes, c_len;
int fn, ret, t_ret;
char *fname;
if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
return (ret);
memset(&log_dbt, 0, sizeof(log_dbt));
ret = logc->get(logc, lsn, &log_dbt, DB_SET);
c_len = logc->c_len;
if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0)
ret = t_ret;
if (ret != 0)
return (ret);
dblp = (DB_LOG *)dbenv->lg_handle;
lp = (LOG *)dblp->reginfo.primary;
R_LOCK(dbenv, &dblp->reginfo);
end_lsn = lp->lsn;
lp->lsn = *lsn;
lp->len = c_len;
lp->lsn.offset += lp->len;
DB_ASSERT(lp->lsn.file >= ckplsn->file);
bytes = 0;
if (ckplsn->file != lp->lsn.file) {
bytes = lp->log_size - ckplsn->offset;
if (lp->lsn.file > ckplsn->file + 1)
bytes += lp->log_size *
(lp->lsn.file - ckplsn->file - 1);
bytes += lp->lsn.offset;
} else
bytes = lp->lsn.offset - ckplsn->offset;
lp->stat.st_wc_mbytes += bytes / MEGABYTE;
lp->stat.st_wc_bytes += bytes % MEGABYTE;
if (log_compare(&lp->s_lsn, lsn) > 0)
lp->s_lsn = lp->lsn;
if (lp->w_off >= lp->lsn.offset) {
lp->f_lsn = lp->lsn;
lp->w_off = lp->lsn.offset;
lp->b_off = 0;
} else
lp->b_off = lp->lsn.offset - lp->w_off;
ZERO_LSN(lp->waiting_lsn);
lp->ready_lsn = lp->lsn;
lp->wait_recs = 0;
lp->rcvd_recs = 0;
for (fn = lp->lsn.file + 1;; fn++) {
if (__log_name(dblp, fn, &fname, &fh, DB_OSO_RDONLY) != 0) {
__os_free(dbenv, fname);
break;
}
(void)__os_closehandle(dbenv, &fh);
ret = __os_unlink(dbenv, fname);
__os_free(dbenv, fname);
if (ret != 0)
goto err;
}
if ((ret = __log_zero(dbenv, &lp->lsn, &end_lsn)) != 0)
goto err;
err: R_UNLOCK(dbenv, &dblp->reginfo);
return (ret);
}
int
__log_is_outdated(dbenv, fnum, outdatedp)
DB_ENV *dbenv;
u_int32_t fnum;
int *outdatedp;
{
DB_LOG *dblp;
LOG *lp;
char *name;
int ret;
u_int32_t cfile;
dblp = dbenv->lg_handle;
*outdatedp = 0;
if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0)
return (ret);
if (__os_exists(name, NULL) == 0)
goto out;
R_LOCK(dbenv, &dblp->reginfo);
lp = (LOG *)dblp->reginfo.primary;
cfile = lp->lsn.file;
R_UNLOCK(dbenv, &dblp->reginfo);
if (cfile > fnum)
*outdatedp = 1;
out: __os_free(dbenv, name);
return (ret);
}
static int
__log_zero(dbenv, from_lsn, to_lsn)
DB_ENV *dbenv;
DB_LSN *from_lsn, *to_lsn;
{
char *lname;
DB_LOG *dblp;
LOG *lp;
int ret;
size_t nbytes, len, nw;
u_int8_t buf[4096];
u_int32_t mbytes, bytes;
dblp = dbenv->lg_handle;
lp = (LOG *)dblp->reginfo.primary;
lname = NULL;
if (dblp->lfname != lp->lsn.file) {
if (F_ISSET(&dblp->lfh, DB_FH_VALID))
(void)__os_closehandle(dbenv, &dblp->lfh);
dblp->lfname = lp->lsn.file;
}
if (from_lsn->file != to_lsn->file) {
if (!F_ISSET(&dblp->lfh, DB_FH_VALID) && (ret =
__log_name(dblp, dblp->lfname, &lname, &dblp->lfh, 0)) != 0)
return (ret);
if ((ret = __os_ioinfo(dbenv,
NULL, &dblp->lfh, &mbytes, &bytes, NULL)) != 0)
goto err;
len = mbytes * MEGABYTE + bytes - from_lsn->offset;
} else if (to_lsn->offset <= from_lsn->offset)
return (0);
else
len = to_lsn->offset = from_lsn->offset;
memset(buf, 0, sizeof(buf));
if (!F_ISSET(&dblp->lfh, DB_FH_VALID) &&
(ret = __log_name(dblp, dblp->lfname, &lname, &dblp->lfh, 0)) != 0)
goto err;
if ((ret = __os_seek(dbenv,
&dblp->lfh, 0, 0, from_lsn->offset, 0, DB_OS_SEEK_SET)) != 0)
return (ret);
while (len > 0) {
nbytes = len > sizeof(buf) ? sizeof(buf) : len;
if ((ret =
__os_write(dbenv, &dblp->lfh, buf, nbytes, &nw)) != 0)
return (ret);
len -= nbytes;
}
err: if (lname != NULL)
__os_free(dbenv, lname);
return (0);
}