/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1999,2008 Oracle. All rights reserved. * * $Id: hash_verify.c,v 12.33 2008/03/12 22:33:03 mbrey Exp $ */ #include "db_config.h" #include "db_int.h" #include "dbinc/db_page.h" #include "dbinc/db_verify.h" #include "dbinc/btree.h" #include "dbinc/hash.h" #include "dbinc/mp.h" static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, u_int32_t)); static int __ham_vrfy_item __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); /* * __ham_vrfy_meta -- * Verify the hash-specific part of a metadata page. * * Note that unlike btree, we don't save things off, because we * will need most everything again to verify each page and the * amount of state here is significant. * * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, * PUBLIC: db_pgno_t, u_int32_t)); */ int __ham_vrfy_meta(dbp, vdp, m, pgno, flags) DB *dbp; VRFY_DBINFO *vdp; HMETA *m; db_pgno_t pgno; u_int32_t flags; { ENV *env; HASH *hashp; VRFY_PAGEINFO *pip; int i, ret, t_ret, isbad; u_int32_t pwr, mbucket; u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); env = dbp->env; isbad = 0; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); hashp = dbp->h_internal; if (hashp != NULL && hashp->h_hash != NULL) hfunc = hashp->h_hash; else hfunc = __ham_func5; /* * If we haven't already checked the common fields in pagezero, * check them. */ if (!F_ISSET(pip, VRFY_INCOMPLETE) && (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } /* h_charkey */ if (!LF_ISSET(DB_NOORDERCHK)) if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { EPRINT((env, "Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set", (u_long)pgno)); /* * Return immediately; this is probably a sign of user * error rather than database corruption, so we want to * avoid extraneous errors. */ isbad = 1; goto err; } /* max_bucket must be less than the last pgno. */ if (m->max_bucket > vdp->last_pgno) { EPRINT((env, "Page %lu: Impossible max_bucket %lu on meta page", (u_long)pgno, (u_long)m->max_bucket)); /* * Most other fields depend somehow on max_bucket, so * we just return--there will be lots of extraneous * errors. */ isbad = 1; goto err; } /* * max_bucket, high_mask and low_mask: high_mask must be one * less than the next power of two above max_bucket, and * low_mask must be one less than the power of two below it. */ pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); if (m->high_mask != pwr - 1) { EPRINT((env, "Page %lu: incorrect high_mask %lu, should be %lu", (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); isbad = 1; } pwr >>= 1; if (m->low_mask != pwr - 1) { EPRINT((env, "Page %lu: incorrect low_mask %lu, should be %lu", (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); isbad = 1; } /* ffactor: no check possible. */ pip->h_ffactor = m->ffactor; /* * nelem: just make sure it's not astronomical for now. This is the * same check that hash_upgrade does, since there was a bug in 2.X * which could make nelem go "negative". */ if (m->nelem > 0x80000000) { EPRINT((env, "Page %lu: suspiciously high nelem of %lu", (u_long)pgno, (u_long)m->nelem)); isbad = 1; pip->h_nelem = 0; } else pip->h_nelem = m->nelem; /* flags */ if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) F_SET(pip, VRFY_HAS_DUPS); if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) F_SET(pip, VRFY_HAS_DUPSORT); /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ /* spares array */ for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) { /* * We set mbucket to the maximum bucket that would use a given * spares entry; we want to ensure that it's always less * than last_pgno. */ mbucket = (1 << i) - 1; if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { EPRINT((env, "Page %lu: spares array entry %d is invalid", (u_long)pgno, i)); isbad = 1; } } err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) ret = t_ret; if (LF_ISSET(DB_SALVAGE) && (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) ret = t_ret; return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } /* * __ham_vrfy -- * Verify hash page. * * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, * PUBLIC: u_int32_t)); */ int __ham_vrfy(dbp, vdp, h, pgno, flags) DB *dbp; VRFY_DBINFO *vdp; PAGE *h; db_pgno_t pgno; u_int32_t flags; { ENV *env; VRFY_PAGEINFO *pip; u_int32_t ent, himark, inpend; db_indx_t *inp; int isbad, ret, t_ret; env = dbp->env; isbad = 0; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); if (TYPE(h) != P_HASH && TYPE(h) != P_HASH_UNSORTED) { ret = __db_unknown_path(env, "__ham_vrfy"); goto err; } /* Verify and save off fields common to all PAGEs. */ if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } /* * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower * than the previous one, higher than the current end of the inp array, * and lower than the page size. * * In any case, we return immediately if things are bad, as it would * be unsafe to proceed. */ inp = P_INP(dbp, h); for (ent = 0, himark = dbp->pgsize, inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); ent < NUM_ENT(h); ent++) if (inp[ent] >= himark) { EPRINT((env, "Page %lu: item %lu is out of order or nonsensical", (u_long)pgno, (u_long)ent)); isbad = 1; goto err; } else if (inpend >= himark) { EPRINT((env, "Page %lu: entries array collided with data", (u_long)pgno)); isbad = 1; goto err; } else { himark = inp[ent]; inpend += sizeof(db_indx_t); if ((ret = __ham_vrfy_item( dbp, vdp, pgno, h, ent, flags)) != 0) goto err; } if (!LF_ISSET(DB_NOORDERCHK) && TYPE(h) == P_HASH && (ret = __ham_verify_sorted_page(dbp, vdp->thread_info, NULL, h)) != 0) isbad = 1; err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) ret = t_ret; return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); } /* * __ham_vrfy_item -- * Given a hash page and an offset, sanity-check the item itself, * and save off any overflow items or off-page dup children as necessary. */ static int __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) DB *dbp; VRFY_DBINFO *vdp; db_pgno_t pgno; PAGE *h; u_int32_t i, flags; { HOFFDUP hod; HOFFPAGE hop; VRFY_CHILDINFO child; VRFY_PAGEINFO *pip; db_indx_t offset, len, dlen, elen; int ret, t_ret; u_int8_t *databuf; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); switch (HPAGE_TYPE(dbp, h, i)) { case H_KEYDATA: /* Nothing to do here--everything but the type field is data */ break; case H_DUPLICATE: /* Are we a datum or a key? Better be the former. */ if (i % 2 == 0) { EPRINT((dbp->env, "Page %lu: hash key stored as duplicate item %lu", (u_long)pip->pgno, (u_long)i)); } /* * Dups are encoded as a series within a single HKEYDATA, * in which each dup is surrounded by a copy of its length * on either side (so that the series can be walked in either * direction. We loop through this series and make sure * each dup is reasonable. * * Note that at this point, we've verified item i-1, so * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). */ len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); /* Make sure the length is plausible. */ if (offset + DUP_SIZE(dlen) > len) { EPRINT((dbp->env, "Page %lu: duplicate item %lu has bad length", (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } /* * Make sure the second copy of the length is the * same as the first. */ memcpy(&elen, databuf + offset + dlen + sizeof(db_indx_t), sizeof(db_indx_t)); if (elen != dlen) { EPRINT((dbp->env, "Page %lu: duplicate item %lu has two different lengths", (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } } F_SET(pip, VRFY_HAS_DUPS); if (!LF_ISSET(DB_NOORDERCHK) && __ham_dups_unsorted(dbp, databuf, len)) F_SET(pip, VRFY_DUPS_UNSORTED); break; case H_OFFPAGE: /* Offpage item. Make sure pgno is sane, save off. */ memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || hop.pgno == PGNO_INVALID) { EPRINT((dbp->env, "Page %lu: offpage item %lu has bad pgno %lu", (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); ret = DB_VERIFY_BAD; goto err; } memset(&child, 0, sizeof(VRFY_CHILDINFO)); child.pgno = hop.pgno; child.type = V_OVERFLOW; child.tlen = hop.tlen; /* This will get checked later. */ if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) goto err; break; case H_OFFDUP: /* Offpage duplicate item. Same drill. */ memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || hod.pgno == PGNO_INVALID) { EPRINT((dbp->env, "Page %lu: offpage item %lu has bad page number", (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } memset(&child, 0, sizeof(VRFY_CHILDINFO)); child.pgno = hod.pgno; child.type = V_DUPLICATE; if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) goto err; F_SET(pip, VRFY_HAS_DUPS); break; default: EPRINT((dbp->env, "Page %lu: item %lu has bad type", (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; break; } err: if ((t_ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __ham_vrfy_structure -- * Verify the structure of a hash database. * * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, * PUBLIC: u_int32_t)); */ int __ham_vrfy_structure(dbp, vdp, meta_pgno, flags) DB *dbp; VRFY_DBINFO *vdp; db_pgno_t meta_pgno; u_int32_t flags; { DB *pgset; DB_MPOOLFILE *mpf; HMETA *m; PAGE *h; VRFY_PAGEINFO *pip; int isbad, p, ret, t_ret; db_pgno_t pgno; u_int32_t bucket, spares_entry; mpf = dbp->mpf; pgset = vdp->pgset; h = NULL; ret = isbad = 0; if ((ret = __db_vrfy_pgset_get(pgset, vdp->thread_info, meta_pgno, &p)) != 0) return (ret); if (p != 0) { EPRINT((dbp->env, "Page %lu: Hash meta page referenced twice", (u_long)meta_pgno)); return (DB_VERIFY_BAD); } if ((ret = __db_vrfy_pgset_inc(pgset, vdp->thread_info, meta_pgno)) != 0) return (ret); /* Get the meta page; we'll need it frequently. */ if ((ret = __memp_fget(mpf, &meta_pgno, vdp->thread_info, NULL, 0, &m)) != 0) return (ret); /* Loop through bucket by bucket. */ for (bucket = 0; bucket <= m->max_bucket; bucket++) if ((ret = __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } /* * There may be unused hash pages corresponding to buckets * that have been allocated but not yet used. These may be * part of the current doubling above max_bucket, or they may * correspond to buckets that were used in a transaction * that then aborted. * * Loop through them, as far as the spares array defines them, * and make sure they're all empty. * * Note that this should be safe, since we've already verified * that the spares array is sane. */ for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { pgno = BS_TO_PAGE(bucket, m->spares); if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) goto err; /* It's okay if these pages are totally zeroed; unmark it. */ F_CLR(pip, VRFY_IS_ALLZEROES); /* It's also OK if this page is simply invalid. */ if (pip->type == P_INVALID) { if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) goto err; continue; } if (pip->type != P_HASH && pip->type != P_HASH_UNSORTED) { EPRINT((dbp->env, "Page %lu: hash bucket %lu maps to non-hash page", (u_long)pgno, (u_long)bucket)); isbad = 1; } else if (pip->entries != 0) { EPRINT((dbp->env, "Page %lu: non-empty page in unused hash bucket %lu", (u_long)pgno, (u_long)bucket)); isbad = 1; } else { if ((ret = __db_vrfy_pgset_get(pgset, vdp->thread_info, pgno, &p)) != 0) goto err; if (p != 0) { EPRINT((dbp->env, "Page %lu: above max_bucket referenced", (u_long)pgno)); isbad = 1; } else { if ((ret = __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0) goto err; if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) goto err; continue; } } /* If we got here, it's an error. */ (void)__db_vrfy_putpageinfo(dbp->env, vdp, pip); goto err; } err: if ((t_ret = __memp_fput(mpf, vdp->thread_info, m, dbp->priority)) != 0) return (t_ret); if (h != NULL && (t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) return (t_ret); return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); } /* * __ham_vrfy_bucket -- * Verify a given bucket. */ static int __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) DB *dbp; VRFY_DBINFO *vdp; HMETA *m; u_int32_t bucket, flags; { ENV *env; HASH *hashp; VRFY_CHILDINFO *child; VRFY_PAGEINFO *mip, *pip; int ret, t_ret, isbad, p; db_pgno_t pgno, next_pgno; DBC *cc; u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); env = dbp->env; isbad = 0; pip = NULL; cc = NULL; hashp = dbp->h_internal; if (hashp != NULL && hashp->h_hash != NULL) hfunc = hashp->h_hash; else hfunc = __ham_func5; if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) return (ret); /* Calculate the first pgno for this bucket. */ pgno = BS_TO_PAGE(bucket, m->spares); if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) goto err; /* Make sure we got a plausible page number. */ if (pgno > vdp->last_pgno || (pip->type != P_HASH && pip->type != P_HASH_UNSORTED)) { EPRINT((env, "Page %lu: impossible first page in bucket %lu", (u_long)pgno, (u_long)bucket)); /* Unsafe to continue. */ isbad = 1; goto err; } if (pip->prev_pgno != PGNO_INVALID) { EPRINT((env, "Page %lu: first page in hash bucket %lu has a prev_pgno", (u_long)pgno, (u_long)bucket)); isbad = 1; } /* * Set flags for dups and sorted dups. */ flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? DB_ST_DUPOK : 0; flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? DB_ST_DUPSORT : 0; /* Loop until we find a fatal bug, or until we run out of pages. */ for (;;) { /* Provide feedback on our progress to the application. */ if (!LF_ISSET(DB_SALVAGE)) __db_vrfy_struct_feedback(dbp, vdp); if ((ret = __db_vrfy_pgset_get(vdp->pgset, vdp->thread_info, pgno, &p)) != 0) goto err; if (p != 0) { EPRINT((env, "Page %lu: hash page referenced twice", (u_long)pgno)); isbad = 1; /* Unsafe to continue. */ goto err; } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, vdp->thread_info, pgno)) != 0) goto err; /* * Hash pages that nothing has ever hashed to may never * have actually come into existence, and may appear to be * entirely zeroed. This is acceptable, and since there's * no real way for us to know whether this has actually * occurred, we clear the "wholly zeroed" flag on every * hash page. A wholly zeroed page, by nature, will appear * to have no flags set and zero entries, so should * otherwise verify correctly. */ F_CLR(pip, VRFY_IS_ALLZEROES); /* If we have dups, our meta page had better know about it. */ if (F_ISSET(pip, VRFY_HAS_DUPS) && !F_ISSET(mip, VRFY_HAS_DUPS)) { EPRINT((env, "Page %lu: duplicates present in non-duplicate database", (u_long)pgno)); isbad = 1; } /* * If the database has sorted dups, this page had better * not have unsorted ones. */ if (F_ISSET(mip, VRFY_HAS_DUPSORT) && F_ISSET(pip, VRFY_DUPS_UNSORTED)) { EPRINT((env, "Page %lu: unsorted dups in sorted-dup database", (u_long)pgno)); isbad = 1; } /* Walk overflow chains and offpage dup trees. */ if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) goto err; for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; ret = __db_vrfy_ccnext(cc, &child)) if (child->type == V_OVERFLOW) { if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, child->pgno, child->tlen, flags | DB_ST_OVFL_LEAF)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } } else if (child->type == V_DUPLICATE) { if ((ret = __db_vrfy_duptype(dbp, vdp, child->pgno, flags)) != 0) { isbad = 1; continue; } if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno, NULL, NULL, flags | DB_ST_RECNUM | DB_ST_DUPSET | DB_ST_TOPLEVEL, NULL, NULL, NULL)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } } if ((ret = __db_vrfy_ccclose(cc)) != 0) goto err; cc = NULL; /* If it's safe to check that things hash properly, do so. */ if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && (ret = __ham_vrfy_hashing(dbp, pip->entries, m, bucket, pgno, flags, hfunc)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else goto err; } next_pgno = pip->next_pgno; ret = __db_vrfy_putpageinfo(env, vdp, pip); pip = NULL; if (ret != 0) goto err; if (next_pgno == PGNO_INVALID) break; /* End of the bucket. */ /* We already checked this, but just in case... */ if (!IS_VALID_PGNO(next_pgno)) { EPRINT((env, "Page %lu: hash page has bad next_pgno", (u_long)pgno)); isbad = 1; goto err; } if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) goto err; if (pip->prev_pgno != pgno) { EPRINT((env, "Page %lu: hash page has bad prev_pgno", (u_long)next_pgno)); isbad = 1; } pgno = next_pgno; } err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) ret = t_ret; if (mip != NULL && ((t_ret = __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0) ret = t_ret; if (pip != NULL && ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0) ret = t_ret; return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } /* * __ham_vrfy_hashing -- * Verify that all items on a given hash page hash correctly. * * PUBLIC: int __ham_vrfy_hashing __P((DB *, * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); */ int __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) DB *dbp; u_int32_t nentries; HMETA *m; u_int32_t thisbucket; db_pgno_t pgno; u_int32_t flags; u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); { DBT dbt; DB_MPOOLFILE *mpf; DB_THREAD_INFO *ip; PAGE *h; db_indx_t i; int ret, t_ret, isbad; u_int32_t hval, bucket; mpf = dbp->mpf; ret = isbad = 0; memset(&dbt, 0, sizeof(DBT)); F_SET(&dbt, DB_DBT_REALLOC); ENV_GET_THREAD_INFO(dbp->env, ip); if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) return (ret); for (i = 0; i < nentries; i += 2) { /* * We've already verified the page integrity and that of any * overflow chains linked off it; it is therefore safe to use * __db_ret. It's also not all that much slower, since we have * to copy every hash item to deal with alignment anyway; we * can tweak this a bit if this proves to be a bottleneck, * but for now, take the easy route. */ if ((ret = __db_ret(dbp, ip, NULL, h, i, &dbt, NULL, NULL)) != 0) goto err; hval = hfunc(dbp, dbt.data, dbt.size); bucket = hval & m->high_mask; if (bucket > m->max_bucket) bucket = bucket & m->low_mask; if (bucket != thisbucket) { EPRINT((dbp->env, "Page %lu: item %lu hashes incorrectly", (u_long)pgno, (u_long)i)); isbad = 1; } } err: if (dbt.data != NULL) __os_ufree(dbp->env, dbt.data); if ((t_ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) return (t_ret); return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } /* * __ham_salvage -- * Safely dump out anything that looks like a key on an alleged * hash page. * * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); */ int __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) DB *dbp; VRFY_DBINFO *vdp; db_pgno_t pgno; PAGE *h; void *handle; int (*callback) __P((void *, const void *)); u_int32_t flags; { DBT dbt, key_dbt, unkdbt; db_pgno_t dpgno; int ret, err_ret, t_ret; u_int32_t himark, i; u_int8_t *hk, *p; void *buf, *key_buf; db_indx_t dlen, len, tlen; memset(&dbt, 0, sizeof(DBT)); dbt.flags = DB_DBT_REALLOC; DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); err_ret = 0; /* * Allocate a buffer for overflow items. Start at one page; * __db_safe_goff will realloc as needed. */ if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0) return (ret); himark = dbp->pgsize; for (i = 0;; i++) { /* If we're not aggressive, break when we hit NUM_ENT(h). */ if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) break; /* Verify the current item. */ ret = __db_vrfy_inpitem(dbp, h, pgno, i, 0, flags, &himark, NULL); /* If this returned a fatality, it's time to break. */ if (ret == DB_VERIFY_FATAL) break; if (ret == 0) { /* Set len to total entry length. */ len = LEN_HITEM(dbp, h, dbp->pgsize, i); hk = P_ENTRY(dbp, h, i); if (len == 0 || len > dbp->pgsize || (u_int32_t)(hk + len - (u_int8_t *)h) > dbp->pgsize) { /* Item is unsafely large; skip it. */ err_ret = DB_VERIFY_BAD; continue; } switch (HPAGE_PTYPE(hk)) { case H_KEYDATA: /* Update len to size of item. */ len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); keydata: memcpy(buf, HKEYDATA_DATA(hk), len); dbt.size = len; dbt.data = buf; if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; case H_OFFPAGE: if (len < HOFFPAGE_SIZE) { err_ret = DB_VERIFY_BAD; continue; } memcpy(&dpgno, HOFFPAGE_PGNO(hk), sizeof(dpgno)); if ((ret = __db_safe_goff(dbp, vdp, dpgno, &dbt, &buf, flags)) != 0) { err_ret = ret; (void)__db_vrfy_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp); /* fallthrough to end of case */ } else if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; case H_OFFDUP: if (len < HOFFDUP_SIZE) { err_ret = DB_VERIFY_BAD; continue; } memcpy(&dpgno, HOFFDUP_PGNO(hk), sizeof(dpgno)); /* UNKNOWN iff pgno is bad or we're a key. */ if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { if ((ret = __db_vrfy_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; } else if ((ret = __db_salvage_duptree(dbp, vdp, dpgno, &dbt, handle, callback, flags | DB_SA_SKIPFIRSTKEY)) != 0) err_ret = ret; break; case H_DUPLICATE: /* * This is an on-page duplicate item, iterate * over the duplicate set, printing out * key/data pairs. */ len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); /* * If this item is at an even index it must be * a key item and it should never be of type * H_DUPLICATE. If we are in aggressive mode, * print the item out as a normal key, and let * the user resolve the discrepancy. */ if (i % 2 == 0) { err_ret = ret; if (LF_ISSET(DB_AGGRESSIVE)) goto keydata; break; } /* * Check to ensure that the item size is * greater than the smallest possible on page * duplicate. */ if (len < HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { err_ret = DB_VERIFY_BAD; continue; } /* * Copy out the key from the dbt, it is still * present from the previous pass. */ memset(&key_dbt, 0, sizeof(key_dbt)); if ((ret = __os_malloc( dbp->env, dbt.size, &key_buf)) != 0) return (ret); memcpy(key_buf, buf, dbt.size); key_dbt.data = key_buf; key_dbt.size = dbt.size; key_dbt.flags = DB_DBT_USERMEM; /* Loop until we hit the total length. */ for (tlen = 0; tlen + sizeof(db_indx_t) < len; tlen += dlen + 2 * sizeof(db_indx_t)) { /* * Print the key for every duplicate * item. Except the first dup, since * the key was already output once by * the previous iteration. */ if (tlen != 0) { if ((ret = __db_vrfy_prdbt( &key_dbt, 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; } p = HKEYDATA_DATA(hk) + tlen; memcpy(&dlen, p, sizeof(db_indx_t)); p += sizeof(db_indx_t); /* * If dlen is too long, print all the * rest of the dup set in a chunk. */ if (dlen + tlen + sizeof(db_indx_t) > len) { dlen = len - (tlen + sizeof(db_indx_t)); err_ret = DB_VERIFY_BAD; } memcpy(buf, p, dlen); dbt.size = dlen; dbt.data = buf; if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; } __os_free(dbp->env, key_buf); break; default: if (!LF_ISSET(DB_AGGRESSIVE)) break; err_ret = DB_VERIFY_BAD; break; } } } __os_free(dbp->env, buf); if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) return (t_ret); return ((ret == 0 && err_ret != 0) ? err_ret : ret); } /* * __ham_meta2pgset -- * Return the set of hash pages corresponding to the given * known-good meta page. * * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, * PUBLIC: DB *)); */ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) DB *dbp; VRFY_DBINFO *vdp; HMETA *hmeta; u_int32_t flags; DB *pgset; { DB_MPOOLFILE *mpf; DB_THREAD_INFO *ip; PAGE *h; db_pgno_t pgno; u_int32_t bucket, totpgs; int ret, val; /* * We don't really need flags, but leave them for consistency with * __bam_meta2pgset. */ COMPQUIET(flags, 0); ip = vdp->thread_info; DB_ASSERT(dbp->env, pgset != NULL); mpf = dbp->mpf; totpgs = 0; /* * Loop through all the buckets, pushing onto pgset the corresponding * page(s) for each one. */ for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { pgno = BS_TO_PAGE(bucket, hmeta->spares); /* * We know the initial pgno is safe because the spares array has * been verified. * * Safely walk the list of pages in this bucket. */ for (;;) { if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) return (ret); if (TYPE(h) == P_HASH || TYPE(h) == P_HASH_UNSORTED) { /* * Make sure we don't go past the end of * pgset. */ if (++totpgs > vdp->last_pgno) { (void)__memp_fput(mpf, ip, h, dbp->priority); return (DB_VERIFY_BAD); } if ((ret = __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0) { (void)__memp_fput(mpf, ip, h, dbp->priority); return (ret); } pgno = NEXT_PGNO(h); } else pgno = PGNO_INVALID; if ((ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) return (ret); /* If the new pgno is wonky, go onto the next bucket. */ if (!IS_VALID_PGNO(pgno) || pgno == PGNO_INVALID) break; /* * If we've touched this page before, we have a cycle; * go on to the next bucket. */ if ((ret = __db_vrfy_pgset_get(pgset, vdp->thread_info, pgno, &val)) != 0) return (ret); if (val != 0) break; } } return (0); } /* * __ham_dups_unsorted -- * Takes a known-safe hash duplicate set and its total length. * Returns 1 if there are out-of-order duplicates in this set, * 0 if there are not. */ static int __ham_dups_unsorted(dbp, buf, len) DB *dbp; u_int8_t *buf; u_int32_t len; { DBT a, b; db_indx_t offset, dlen; int (*func) __P((DB *, const DBT *, const DBT *)); memset(&a, 0, sizeof(DBT)); memset(&b, 0, sizeof(DBT)); func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; /* * Loop through the dup set until we hit the end or we find * a pair of dups that's out of order. b is always the current * dup, a the one before it. */ for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { memcpy(&dlen, buf + offset, sizeof(db_indx_t)); b.data = buf + offset + sizeof(db_indx_t); b.size = dlen; if (a.data != NULL && func(dbp, &a, &b) > 0) return (1); a.data = b.data; a.size = b.size; } return (0); }