#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94";
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/db/btree/bt_split.c,v 1.7 2004/09/13 22:07:24 kuriyama Exp $");
#include <sys/types.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static int bt_broot(BTREE *, PAGE *, PAGE *, PAGE *);
static PAGE *bt_page (BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_preserve(BTREE *, pgno_t);
static PAGE *bt_psplit (BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t);
static PAGE *bt_root (BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_rroot(BTREE *, PAGE *, PAGE *, PAGE *);
static recno_t rec_total(PAGE *);
#ifdef STATISTICS
u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
#endif
int
__bt_split(t, sp, key, data, flags, ilen, argskip)
BTREE *t;
PAGE *sp;
const DBT *key, *data;
int flags;
size_t ilen;
u_int32_t argskip;
{
BINTERNAL *bi;
BLEAF *bl, *tbl;
DBT a, b;
EPGNO *parent;
PAGE *h, *l, *r, *lchild, *rchild;
indx_t nxtindex;
u_int16_t skip;
u_int32_t n, nbytes, nksize;
int parentsplit;
char *dest;
skip = argskip;
h = sp->pgno == P_ROOT ?
bt_root(t, sp, &l, &r, &skip, ilen) :
bt_page(t, sp, &l, &r, &skip, ilen);
if (h == NULL)
return (RET_ERROR);
h->linp[skip] = h->upper -= ilen;
dest = (char *)h + h->upper;
if (F_ISSET(t, R_RECNO))
WR_RLEAF(dest, data, flags)
else
WR_BLEAF(dest, key, data, flags)
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err2;
while ((parent = BT_POP(t)) != NULL) {
lchild = l;
rchild = r;
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
goto err2;
skip = parent->index + 1;
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(rchild, 0);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
bl = GETBLEAF(rchild, 0);
nbytes = NBINTERNAL(bl->ksize);
if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
(h->prevpg != P_INVALID || skip > 1)) {
tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
a.size = tbl->ksize;
a.data = tbl->bytes;
b.size = bl->ksize;
b.data = bl->bytes;
nksize = t->bt_pfx(&a, &b);
n = NBINTERNAL(nksize);
if (n < nbytes) {
#ifdef STATISTICS
bt_pfxsaved += nbytes - n;
#endif
nbytes = n;
} else
nksize = 0;
} else
nksize = 0;
break;
case P_RINTERNAL:
case P_RLEAF:
nbytes = NRINTERNAL;
break;
default:
abort();
}
if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
sp = h;
h = h->pgno == P_ROOT ?
bt_root(t, h, &l, &r, &skip, nbytes) :
bt_page(t, h, &l, &r, &skip, nbytes);
if (h == NULL)
goto err1;
parentsplit = 1;
} else {
if (skip < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + skip + 1, h->linp + skip,
(nxtindex - skip) * sizeof(indx_t));
h->lower += sizeof(indx_t);
parentsplit = 0;
}
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
memmove(dest, bi, nbytes);
((BINTERNAL *)dest)->pgno = rchild->pgno;
break;
case P_BLEAF:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
rchild->pgno, bl->flags & P_BIGKEY);
memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
goto err1;
break;
case P_RINTERNAL:
if (skip > 0)
dest = (char *)h + h->linp[skip - 1];
else
dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)dest)->nrecs = rec_total(lchild);
((RINTERNAL *)dest)->pgno = lchild->pgno;
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
((RINTERNAL *)dest)->nrecs = rec_total(rchild);
((RINTERNAL *)dest)->pgno = rchild->pgno;
break;
case P_RLEAF:
if (skip > 0)
dest = (char *)h + h->linp[skip - 1];
else
dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
((RINTERNAL *)dest)->pgno = lchild->pgno;
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
((RINTERNAL *)dest)->pgno = rchild->pgno;
break;
default:
abort();
}
if (!parentsplit) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
}
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err1;
mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
}
mpool_put(t->bt_mp, l, MPOOL_DIRTY);
mpool_put(t->bt_mp, r, MPOOL_DIRTY);
return (RET_SUCCESS);
err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
err2: mpool_put(t->bt_mp, l, 0);
mpool_put(t->bt_mp, r, 0);
__dbpanic(t->bt_dbp);
return (RET_ERROR);
}
static PAGE *
bt_page(t, h, lp, rp, skip, ilen)
BTREE *t;
PAGE *h, **lp, **rp;
indx_t *skip;
size_t ilen;
{
PAGE *l, *r, *tp;
pgno_t npg;
#ifdef STATISTICS
++bt_split;
#endif
if ((r = __bt_new(t, &npg)) == NULL)
return (NULL);
r->pgno = npg;
r->lower = BTDATAOFF;
r->upper = t->bt_psize;
r->nextpg = h->nextpg;
r->prevpg = h->pgno;
r->flags = h->flags & P_TYPE;
if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
#ifdef STATISTICS
++bt_sortsplit;
#endif
h->nextpg = r->pgno;
r->lower = BTDATAOFF + sizeof(indx_t);
*skip = 0;
*lp = h;
*rp = r;
return (r);
}
if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) {
mpool_put(t->bt_mp, r, 0);
return (NULL);
}
#ifdef PURIFY
memset(l, 0xff, t->bt_psize);
#endif
l->pgno = h->pgno;
l->nextpg = r->pgno;
l->prevpg = h->prevpg;
l->lower = BTDATAOFF;
l->upper = t->bt_psize;
l->flags = h->flags & P_TYPE;
if (h->nextpg != P_INVALID) {
if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
free(l);
return (NULL);
}
tp->prevpg = r->pgno;
mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
}
tp = bt_psplit(t, h, l, r, skip, ilen);
memmove(h, l, t->bt_psize);
if (tp == l)
tp = h;
free(l);
*lp = h;
*rp = r;
return (tp);
}
static PAGE *
bt_root(t, h, lp, rp, skip, ilen)
BTREE *t;
PAGE *h, **lp, **rp;
indx_t *skip;
size_t ilen;
{
PAGE *l, *r, *tp;
pgno_t lnpg, rnpg;
#ifdef STATISTICS
++bt_split;
++bt_rootsplit;
#endif
if ((l = __bt_new(t, &lnpg)) == NULL ||
(r = __bt_new(t, &rnpg)) == NULL)
return (NULL);
l->pgno = lnpg;
r->pgno = rnpg;
l->nextpg = r->pgno;
r->prevpg = l->pgno;
l->prevpg = r->nextpg = P_INVALID;
l->lower = r->lower = BTDATAOFF;
l->upper = r->upper = t->bt_psize;
l->flags = r->flags = h->flags & P_TYPE;
tp = bt_psplit(t, h, l, r, skip, ilen);
*lp = l;
*rp = r;
return (tp);
}
static int
bt_rroot(t, h, l, r)
BTREE *t;
PAGE *h, *l, *r;
{
char *dest;
h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
dest = (char *)h + h->upper;
WR_RINTERNAL(dest,
l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
h->linp[1] = h->upper -= NRINTERNAL;
dest = (char *)h + h->upper;
WR_RINTERNAL(dest,
r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
h->flags &= ~P_TYPE;
h->flags |= P_RINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
static int
bt_broot(t, h, l, r)
BTREE *t;
PAGE *h, *l, *r;
{
BINTERNAL *bi;
BLEAF *bl;
u_int32_t nbytes;
char *dest;
nbytes = NBINTERNAL(0);
h->linp[0] = h->upper = t->bt_psize - nbytes;
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, 0, l->pgno, 0);
switch (h->flags & P_TYPE) {
case P_BLEAF:
bl = GETBLEAF(r, 0);
nbytes = NBINTERNAL(bl->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
memmove(dest, bl->bytes, bl->ksize);
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
return (RET_ERROR);
break;
case P_BINTERNAL:
bi = GETBINTERNAL(r, 0);
nbytes = NBINTERNAL(bi->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)h + h->upper;
memmove(dest, bi, nbytes);
((BINTERNAL *)dest)->pgno = r->pgno;
break;
default:
abort();
}
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
h->flags &= ~P_TYPE;
h->flags |= P_BINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
static PAGE *
bt_psplit(t, h, l, r, pskip, ilen)
BTREE *t;
PAGE *h, *l, *r;
indx_t *pskip;
size_t ilen;
{
BINTERNAL *bi;
BLEAF *bl;
CURSOR *c;
RLEAF *rl;
PAGE *rval;
void *src;
indx_t full, half, nxt, off, skip, top, used;
u_int32_t nbytes;
int bigkeycnt, isbigkey;
bigkeycnt = 0;
skip = *pskip;
full = t->bt_psize - BTDATAOFF;
half = full / 2;
used = 0;
for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
if (skip == off) {
nbytes = ilen;
isbigkey = 0;
} else
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
isbigkey = bi->flags & P_BIGKEY;
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
isbigkey = bl->flags & P_BIGKEY;
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
isbigkey = 0;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
isbigkey = 0;
break;
default:
abort();
}
if ((skip <= off && used + nbytes + sizeof(indx_t) >= full)
|| nxt == top - 1) {
--off;
break;
}
if (skip != off) {
++nxt;
l->linp[off] = l->upper -= nbytes;
memmove((char *)l + l->upper, src, nbytes);
}
used += nbytes + sizeof(indx_t);
if (used >= half) {
if (!isbigkey || bigkeycnt == 3)
break;
else
++bigkeycnt;
}
}
l->lower += (off + 1) * sizeof(indx_t);
c = &t->bt_cursor;
if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
if (c->pg.index >= skip)
++c->pg.index;
if (c->pg.index < nxt)
c->pg.pgno = l->pgno;
else {
c->pg.pgno = r->pgno;
c->pg.index -= nxt;
}
}
if (skip <= off) {
skip = MAX_PAGE_OFFSET;
rval = l;
} else {
rval = r;
*pskip -= nxt;
}
for (off = 0; nxt < top; ++off) {
if (skip == nxt) {
++off;
skip = MAX_PAGE_OFFSET;
}
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
break;
default:
abort();
}
++nxt;
r->linp[off] = r->upper -= nbytes;
memmove((char *)r + r->upper, src, nbytes);
}
r->lower += off * sizeof(indx_t);
if (skip == top)
r->lower += sizeof(indx_t);
return (rval);
}
static int
bt_preserve(t, pg)
BTREE *t;
pgno_t pg;
{
PAGE *h;
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
h->flags |= P_PRESERVE;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
static recno_t
rec_total(h)
PAGE *h;
{
recno_t recs;
indx_t nxt, top;
for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
recs += GETRINTERNAL(h, nxt)->nrecs;
return (recs);
}