#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#if SHARE
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#define ERR -1
#endif
#include "invlib.h"
#include "global.h"
#include <assert.h>
#define DEBUG 0
#define BLOCKSIZE 2 * BUFSIZ
#define POSTINC 10000
#define SEP ' '
#define SETINC 100
#define STATS 0
#define SUPERINC 10000
#define TERMMAX 512
#define FMTVERSION 1
#define ZIPFSIZE 200
static char const rcsid[] = "$Id: invlib.c,v 1.18 2006/09/30 15:38:16 broeker Exp $";
#if DEBUG
int invbreak;
#endif
static int boolready(void);
static int invnewterm(void);
static void invstep(INVCONTROL *invcntl);
static void invcannotalloc(unsigned n);
static void invcannotopen(char *file);
static void invcannotwrite(char *file);
#if STATS
int showzipf;
#endif
static POSTING *item, *enditem, *item1 = NULL, *item2 = NULL;
static unsigned setsize1, setsize2;
static long numitems, totterm, zerolong;
static char *indexfile, *postingfile;
static FILE *outfile, *fpost;
static unsigned supersize = SUPERINC, supintsize;
static unsigned int numpost, numlogblk, amtused, nextpost;
static unsigned int lastinblk, numinvitems;
static POSTING *POST, *postptr;
static unsigned long *SUPINT, *supint, nextsupfing;
static char *SUPFING, *supfing;
static char thisterm[TERMMAX];
typedef union logicalblk {
long invblk[BLOCKSIZE / sizeof(long)];
char chrblk[BLOCKSIZE];
} t_logicalblk;
static t_logicalblk logicalblk;
#if DEBUG || STATS
static long totpost;
#endif
#if STATS
static int zipf[ZIPFSIZE + 1];
#endif
long
invmake(char *invname, char *invpost, FILE *infile)
{
unsigned char *s;
long num;
int i;
long fileindex = 0;
unsigned postsize = POSTINC * sizeof(POSTING);
unsigned long *intptr;
char line[TERMMAX];
long tlong;
PARAM param;
POSTING posting;
char temp[BLOCKSIZE];
#if STATS
int j;
unsigned maxtermlen = 0;
#endif
if ((outfile = vpfopen(invname, "w+b")) == NULL) {
invcannotopen(invname);
return(0);
}
indexfile = invname;
fseek(outfile, BUFSIZ, SEEK_SET);
if ((fpost = vpfopen(invpost, "wb")) == NULL) {
invcannotopen(invpost);
return(0);
}
postingfile = invpost;
nextpost = 0;
if ((POST = malloc(postsize)) == NULL) {
invcannotalloc(postsize);
return(0);
}
postptr = POST;
if ((SUPFING = malloc(supersize)) == NULL) {
invcannotalloc(supersize);
return(0);
}
supfing = SUPFING;
supintsize = supersize / 40;
if ((SUPINT = malloc(supintsize * sizeof(long))) == NULL) {
invcannotalloc(supintsize * sizeof(long));
return(0);
}
supint = SUPINT;
supint++;
strcpy(thisterm, "");
*supint++ = 0;
*supfing++ = ' ';
*supfing++ = '\0';
nextsupfing = 2;
#if DEBUG || STATS
totpost = 0L;
#endif
totterm = 0L;
numpost = 1;
amtused = 16;
numinvitems = 0;
numlogblk = 0;
lastinblk = sizeof(t_logicalblk);
while (fgets(line, TERMMAX, infile) != NULL) {
#if DEBUG || STATS
++totpost;
#endif
s = strchr(line, SEP);
if (s != NULL) {
*s = '\0';
}
else {
continue;
}
#if STATS
if ((i = strlen(line)) > maxtermlen) {
maxtermlen = i;
}
#endif
#if DEBUG
printf("%ld: %s ", totpost, line);
fflush(stdout);
#endif
if (strcmp(thisterm, line) == 0) {
if (postptr + 10 > POST + postsize / sizeof(POSTING)) {
i = postptr - POST;
postsize += POSTINC * sizeof(POSTING);
if ((POST = realloc(POST, postsize)) == NULL) {
invcannotalloc(postsize);
return(0);
}
postptr = i + POST;
#if DEBUG
printf("reallocated post space to %u, totpost=%ld\n",
postsize, totpost);
#endif
}
numpost++;
} else {
if (!invnewterm()) {
return(0);
}
strcpy(thisterm, line);
numpost = 1;
postptr = POST;
fileindex = 0;
}
num = *++s - '!';
i = 1;
do {
num = BASE * num + *++s - '!';
} while (++i < PRECISION);
posting.lineoffset = num;
while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) {
;
}
posting.fileindex = --fileindex;
posting.type = *++s;
num = *++s - '!';
if (*s != '\n') {
num = *++s - '!';
while (*++s != '\n') {
num = BASE * num + *s - '!';
}
posting.fcnoffset = num;
}
else {
posting.fcnoffset = 0;
}
*postptr++ = posting;
#if DEBUG
printf("%ld %ld %ld %ld\n", posting.fileindex,
posting.fcnoffset, posting.lineoffset, posting.type);
fflush(stdout);
#endif
}
if (!invnewterm()) {
return(0);
}
logicalblk.invblk[0] = numinvitems;
logicalblk.invblk[1] = 0;
logicalblk.invblk[2] = numlogblk - 1;
if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {
goto cannotwrite;
}
numlogblk++;
if (fwrite(&logicalblk, sizeof(t_logicalblk), 1, outfile) == 0) {
goto cannotwrite;
}
*SUPINT = numlogblk;
intptr = (SUPINT + 1);
i = (char *)supint - (char *)SUPINT;
while (intptr < supint)
*intptr++ += i;
if (fwrite(SUPINT, sizeof(*SUPINT), numlogblk + 1, outfile) == 0 ||
fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {
goto cannotwrite;
}
nextsupfing = sizeof(long) + sizeof(long) * numlogblk + (supfing - SUPFING);
i = nextsupfing % sizeof(t_logicalblk);
if (fwrite(temp, sizeof(t_logicalblk) - i, 1, outfile) == 0 ||
fflush(outfile) == EOF) {
goto cannotwrite;
}
rewind(outfile);
param.version = FMTVERSION;
param.filestat = 0;
param.sizeblk = sizeof(t_logicalblk);
param.startbyte = (numlogblk + 1) * sizeof(t_logicalblk) + BUFSIZ;;
param.supsize = nextsupfing;
param.cntlsize = BUFSIZ;
param.share = 0;
if (fwrite(¶m, sizeof(param), 1, outfile) == 0) {
goto cannotwrite;
}
for (i = 0; i < 10; i++)
if (fwrite(&zerolong, sizeof(zerolong), 1, outfile) == 0) {
goto cannotwrite;
}
if (fflush(outfile) == EOF) {
goto cannotwrite;
}
fseek(outfile, BUFSIZ + 2 * sizeof(long), SEEK_SET);
tlong = numlogblk - 1;
if (fwrite(&tlong, sizeof(tlong), 1, outfile) == 0 ||
fclose(outfile) == EOF) {
cannotwrite:
invcannotwrite(invname);
return(0);
}
if (fclose(fpost) == EOF) {
invcannotwrite(postingfile);
return(0);
}
--totterm;
#if STATS
printf("logical blocks = %d, postings = %ld, terms = %ld, max term length = %d\n",
numlogblk, totpost, totterm, maxtermlen);
if (showzipf) {
printf("\n************* ZIPF curve ****************\n");
for (j = ZIPFSIZE; j > 1; j--)
if (zipf[j])
break;
for (i = 1; i < j; ++i) {
printf("%3d -%6d ", i, zipf[i]);
if (i % 6 == 0) putchar('\n');
}
printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);
}
#endif
free(POST);
free(SUPFING);
free(SUPINT);
return(totterm);
}
static int
invnewterm(void)
{
int backupflag, i, j, holditems, gooditems, howfar;
unsigned int maxback, len, numwilluse, wdlen;
char *tptr, *tptr2, *tptr3;
union {
unsigned long packword[2];
ENTRY e;
} iteminfo;
gooditems = 0;
totterm++;
#if STATS
if (numpost <= ZIPFSIZE)
zipf[numpost]++;
else
zipf[0]++;
#endif
len = strlen(thisterm);
wdlen = (len + (sizeof(long) - 1)) / sizeof(long);
numwilluse = (wdlen + 3) * sizeof(long);
if (numinvitems && numwilluse + amtused > sizeof(t_logicalblk)) {
if (supfing + 500 > SUPFING + supersize) {
i = supfing - SUPFING;
supersize += 20000;
if ((SUPFING = (char *)realloc(SUPFING, supersize)) == NULL) {
invcannotalloc(supersize);
return(0);
}
supfing = i + SUPFING;
#if DEBUG
printf("reallocated superfinger space to %d, totpost=%ld\n",
supersize, totpost);
#endif
}
if ((numlogblk + 10) > supintsize) {
i = supint - SUPINT;
supintsize += SUPERINC;
if ((SUPINT = realloc(SUPINT, supintsize * sizeof(long))) == NULL) {
invcannotalloc(supintsize * sizeof(long));
return(0);
}
supint = i + SUPINT;
#if DEBUG
printf("reallocated superfinger offset to %d, totpost = %ld\n",
supintsize * sizeof(long), totpost);
#endif
}
backupflag = 0;
maxback = (int) strlen(thisterm) / 10;
holditems = numinvitems;
if (maxback > numinvitems)
maxback = numinvitems - 2;
howfar = 0;
while (maxback-- > 1) {
howfar++;
iteminfo.packword[0] =
logicalblk.invblk[--holditems * 2 + (sizeof(long) - 1)];
if ((i = iteminfo.e.size / 10) < maxback) {
maxback = i;
backupflag = howfar;
gooditems = holditems;
tptr2 = logicalblk.chrblk + iteminfo.e.offset;
}
}
if (backupflag) {
numinvitems = gooditems;
}
logicalblk.invblk[0] = numinvitems;
logicalblk.invblk[1] = numlogblk + 1;
logicalblk.invblk[2] = numlogblk - 1;
if (fwrite(logicalblk.chrblk, 1, sizeof(t_logicalblk), outfile) == 0) {
invcannotwrite(indexfile);
return(0);
}
amtused = 16;
numlogblk++;
if (backupflag) {
iteminfo.packword[0] = logicalblk.invblk[numinvitems*2+1];
tptr3 = logicalblk.chrblk + iteminfo.e.offset;
for (i = 3; i <= (backupflag * 2 + 2); i++)
logicalblk.invblk[i] = logicalblk.invblk[numinvitems*2+i];
iteminfo.packword[0] = logicalblk.invblk[3];
iteminfo.packword[1] = logicalblk.invblk[4];
tptr2 = logicalblk.chrblk + iteminfo.e.offset;
strncpy(supfing, tptr2, (int) iteminfo.e.size);
*(supfing + iteminfo.e.size) = '\0';
#if DEBUG
printf("backup %d at term=%s to term=%s\n",
backupflag, thisterm, supfing);
#endif
*supint++ = nextsupfing;
nextsupfing += strlen(supfing) + 1;
supfing += strlen(supfing) + 1;
tptr = logicalblk.chrblk + lastinblk;
lastinblk = sizeof(t_logicalblk);
tptr2 = logicalblk.chrblk + lastinblk;
j = tptr3 - tptr;
while (tptr3 > tptr)
*--tptr2 = *--tptr3;
lastinblk -= j;
amtused += (8 * backupflag + j);
for (i = 3; i < (backupflag * 2 + 2); i += 2) {
iteminfo.packword[0] = logicalblk.invblk[i];
iteminfo.e.offset += (tptr2 - tptr3);
logicalblk.invblk[i] = iteminfo.packword[0];
}
numinvitems = backupflag;
} else {
numinvitems = 0;
lastinblk = sizeof(t_logicalblk);
strcpy(supfing, thisterm);
supfing += strlen(thisterm) + 1;
*supint++ = nextsupfing;
nextsupfing += strlen(thisterm) + 1;
}
}
lastinblk -= (numwilluse - 2 * sizeof(long));
iteminfo.e.offset = lastinblk;
iteminfo.e.size = len;
iteminfo.e.space = 0;
iteminfo.e.post = numpost;
strncpy(logicalblk.chrblk + lastinblk, thisterm, len);
amtused += numwilluse;
logicalblk.invblk[(lastinblk/sizeof(long))+wdlen] = nextpost;
if ((i = postptr - POST) > 0) {
if (fwrite(POST, sizeof(POSTING), i, fpost) == 0) {
invcannotwrite(postingfile);
return(0);
}
nextpost += i * sizeof(POSTING);
}
logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0];
logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1];
return(1);
}
static int
invflipname(char * invname, const char *from, const char *to)
{
char *temp, *i = NULL;
assert(strlen(from) == strlen(to));
temp = invname - 1;
while( (temp = strstr(temp + 1, from)))
i = temp;
if (!i || i[strlen(from)] != '\0')
return -1;
while(*to)
*i++ = *to++;
return 0;
}
int
invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat)
{
int read_index;
if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))) == NULL) {
if (!invflipname(invname, INVNAME2, INVNAME)) {
if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))))
goto openedinvname;
invflipname(invname, INVNAME, INVNAME2);
}
else if (!invflipname(invname, INVNAME, INVNAME2)) {
if ((invcntl->invfile = vpfopen(invname, ((stat == 0) ? "rb" : "r+b"))))
goto openedinvname;
invflipname(invname, INVNAME2, INVNAME);
}
invcannotopen(invname);
return(-1);
}
openedinvname:
if (fread(&invcntl->param, sizeof(invcntl->param), 1, invcntl->invfile) == 0) {
fprintf(stderr, "%s: empty inverted file\n", argv0);
goto closeinv;
}
if (invcntl->param.version != FMTVERSION) {
fprintf(stderr, "%s: cannot read old index format; use -U option to force database to rebuild\n", argv0);
goto closeinv;
}
assert(invcntl->param.sizeblk == sizeof(t_logicalblk));
if (stat == 0 && invcntl->param.filestat == INVALONE) {
fprintf(stderr, "%s: inverted file is locked\n", argv0);
goto closeinv;
}
if ((invcntl->postfile = vpfopen(invpost, ((stat == 0) ? "rb" : "r+b"))) == NULL) {
if (!invflipname(invpost, INVPOST2, INVPOST)) {
if ((invcntl->postfile = vpfopen(invpost, ((stat == 0) ? "rb" : "r+b"))))
goto openedinvpost;
invflipname(invpost, INVPOST, INVPOST2);
} else if (!invflipname(invpost, INVPOST, INVPOST2)) {
if ((invcntl->postfile = vpfopen(invpost,((stat == 0)?"rb":"r+b"))))
goto openedinvpost;
invflipname(invpost, INVPOST2, INVPOST);
}
invcannotopen(invpost);
goto closeinv;
}
openedinvpost:
if ((invcntl->logblk = malloc((unsigned) invcntl->param.sizeblk)) == NULL) {
invcannotalloc((unsigned) invcntl->param.sizeblk);
goto closeboth;
}
read_index = 1;
invcntl->iindex = NULL;
#if SHARE
if (invcntl->param.share == 1) {
key_t shm_key;
struct shmid_ds shm_buf;
int shm_id;
shm_key = ftok(invname, 2);
shm_id = shmget(shm_key, 0, 0);
if (shm_id == -1) {
shm_id = shmget(shm_key, invcntl->param.supsize + sizeof(long), IPC_CREAT | 0666);
if (shm_id == -1)
perror("Could not create shared memory segment");
} else
read_index = 0;
if (shm_id != -1) {
invcntl->iindex = shmat(shm_id, 0, ((read_index) ? 0 : SHM_RDONLY));
if (invcntl->iindex == (char *)ERR) {
fprintf(stderr, "%s: shared memory link failed\n", argv0);
invcntl->iindex = NULL;
read_index = 1;
}
}
}
#endif
if (invcntl->iindex == NULL)
invcntl->iindex = malloc((unsigned) invcntl->param.supsize
+ 4 *sizeof(long));
if (invcntl->iindex == NULL) {
invcannotalloc((unsigned) invcntl->param.supsize);
free(invcntl->logblk);
goto closeboth;
}
if (read_index) {
fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
fread(invcntl->iindex, (int) invcntl->param.supsize, 1,
invcntl->invfile);
}
invcntl->numblk = -1;
if (boolready() == -1) {
closeboth:
fclose(invcntl->postfile);
closeinv:
fclose(invcntl->invfile);
return(-1);
}
invcntl->param.filestat = stat;
if (stat > invcntl->param.filestat ) {
rewind(invcntl->invfile);
fwrite(&invcntl->param, sizeof(invcntl->param), 1, invcntl->invfile);
}
return(1);
}
void
invclose(INVCONTROL *invcntl)
{
if (invcntl->param.filestat > 0) {
invcntl->param.filestat = 0;
rewind(invcntl->invfile);
fwrite(&invcntl->param, 1,
sizeof(invcntl->param), invcntl->invfile);
}
if (invcntl->param.filestat == INVALONE) {
fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
fwrite(invcntl->iindex, 1,
(int) invcntl->param.supsize, invcntl->invfile);
}
fclose(invcntl->invfile);
fclose(invcntl->postfile);
#if SHARE
if (invcntl->param.share > 0) {
shmdt(invcntl->iindex);
invcntl->iindex = NULL;
}
#endif
if (invcntl->iindex != NULL)
free(invcntl->iindex);
free(invcntl->logblk);
}
static void
invstep(INVCONTROL *invcntl)
{
if (invcntl->keypnt < (invcntl->logblk->invblk[0] - 1)) {
invcntl->keypnt++;
return;
}
invcntl->numblk = invcntl->logblk->invblk[1];
fseek(invcntl->invfile,
invcntl->numblk*invcntl->param.sizeblk + invcntl->param.cntlsize,
SEEK_SET);
fread(invcntl->logblk, (int) invcntl->param.sizeblk, 1,
invcntl->invfile);
invcntl->keypnt = 0;
}
int
invforward(INVCONTROL *invcntl)
{
invstep(invcntl);
while (((ENTRY * )(invcntl->logblk->invblk + 3) + invcntl->keypnt)->post == 0) {
invstep(invcntl);
}
if ((invcntl->numblk == 0) && (invcntl->keypnt == 0))
return(0);
return(1);
}
long
invterm(INVCONTROL *invcntl, char *term)
{
ENTRY * entryptr;
entryptr = (ENTRY *)(invcntl->logblk->invblk + 3) + invcntl->keypnt;
strncpy(term, invcntl->logblk->chrblk + entryptr->offset,
(int) entryptr->size);
*(term + entryptr->size) = '\0';
return(entryptr->post);
}
long
invfind(INVCONTROL *invcntl, char *searchterm)
{
int imid, ilow, ihigh;
long num;
int i;
unsigned long *intptr, *intptr2;
ENTRY *entryptr;
if (invcntl->invfile == 0)
return(-1L);
intptr = (unsigned long *)invcntl->iindex;
ilow = 0;
ihigh = *intptr++ - 1;
while (ilow <= ihigh) {
imid = (ilow + ihigh) / 2;
intptr2 = intptr + imid;
i = strcmp(searchterm, (invcntl->iindex + *intptr2));
if (i < 0)
ihigh = imid - 1;
else if (i > 0)
ilow = ++imid;
else {
ilow = imid + 1;
break;
}
}
imid = (ilow) ? ilow - 1 : 0;
if ((imid != invcntl->numblk) || (invcntl->param.filestat >= INVBUSY)) {
fseek(invcntl->invfile,
(imid*invcntl->param.sizeblk) + invcntl->param.cntlsize,
SEEK_SET);
invcntl->numblk = imid;
fread(invcntl->logblk, (int)invcntl->param.sizeblk, 1,
invcntl->invfile);
}
srch_ext:
intptr = (unsigned long *) invcntl->logblk->invblk;
ilow = 0;
ihigh = *intptr - 1;
intptr += 3;
num = 0;
while (ilow <= ihigh) {
imid = (ilow + ihigh) / 2;
entryptr = (ENTRY *)intptr + imid;
i = strncmp(searchterm, invcntl->logblk->chrblk + entryptr->offset,
(int) entryptr->size );
if (i == 0)
i = strlen(searchterm) - entryptr->size;
if (i < 0)
ihigh = imid - 1;
else if (i > 0)
ilow = ++imid;
else {
num = entryptr->post;
break;
}
}
if (imid >= invcntl->logblk->invblk[0]) {
invcntl->keypnt = invcntl->logblk->invblk[0];
invstep(invcntl);
if (invcntl->param.startbyte < invcntl->numblk * invcntl->param.sizeblk)
goto srch_ext;
} else
invcntl->keypnt = imid;
return(num);
}
#if DEBUG
void
invdump(INVCONTROL *invcntl, char *term)
{
long i, j, n, *longptr;
ENTRY * entryptr;
char temp[512], *ptr;
if (*term == '-') {
j = atoi(term + 1);
longptr = (long *)invcntl->iindex;
n = *longptr++;
printf("Superindex dump, num blocks=%ld\n", n);
longptr += j;
while ((longptr <= ((long *)invcntl->iindex) + n) && invbreak == 0) {
printf("%2ld %6ld %s\n", j++, *longptr, invcntl->iindex + *longptr);
longptr++;
}
return;
} else if (*term == '#') {
j = atoi(term + 1);
invcntl->numblk = j;
fseek(invcntl->invfile,
(j * invcntl->param.sizeblk) + invcntl->param.cntlsize,
SEEK_SET);
fread(invcntl->logblk, (int) invcntl->param.sizeblk, 1,
invcntl->invfile);
} else
i = abs((int) invfind(invcntl, term));
longptr = invcntl->logblk->invblk;
n = *longptr++;
printf("Entry term to invdump=%s, postings=%ld, forwrd ptr=%ld, back ptr=%ld\n"
, term, i, *(longptr), *(longptr + 1));
entryptr = (ENTRY *) (invcntl->logblk->invblk + 3);
printf("%ld terms in this block, block=%ld\n", n, invcntl->numblk);
printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n");
for (j = 0; j < n && invbreak == 0; j++) {
ptr = invcntl->logblk->chrblk + entryptr->offset;
strncpy(temp, ptr, (int) entryptr->size);
temp[entryptr->size] = '\0';
ptr += (sizeof(long) * (long)((entryptr->size + (sizeof(long) - 1)) / sizeof(long)));
printf("%2ld %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp, entryptr->post,
entryptr->size, entryptr->offset, entryptr->space,
*(long *)ptr);
entryptr++;
}
}
#endif
static int
boolready(void)
{
numitems = 0;
if (item1 != NULL)
free(item1);
setsize1 = SETINC;
if ((item1 = malloc(SETINC * sizeof(POSTING))) == NULL) {
invcannotalloc(SETINC);
return(-1);
}
if (item2 != NULL)
free(item2);
setsize2 = SETINC;
if ((item2 = malloc(SETINC * sizeof(POSTING))) == NULL) {
invcannotalloc(SETINC);
return(-1);
}
item = item1;
enditem = item;
return(0);
}
void
boolclear(void)
{
numitems = 0;
item = item1;
enditem = item;
}
POSTING *
boolfile(INVCONTROL *invcntl, long *num, int boolarg)
{
ENTRY *entryptr;
FILE *file;
void *ptr;
unsigned long *ptr2;
POSTING *newitem = NULL;
POSTING posting;
unsigned u;
POSTING *newsetp = NULL, *set1p;
long newsetc, set1c, set2c;
entryptr = (ENTRY *) (invcntl->logblk->invblk + 3) + invcntl->keypnt;
ptr = invcntl->logblk->chrblk + entryptr->offset;
ptr2 = ((unsigned long *) ptr) + (entryptr->size + (sizeof(long) - 1)) / sizeof(long);
*num = entryptr->post;
switch (boolarg) {
case BOOL_OR:
case NOT:
if (*num == 0) {
*num = numitems;
return(item);
}
}
u = 0;
switch (boolarg) {
case AND:
case NOT:
newsetp = set1p = item;
break;
case BOOL_OR:
u = enditem - item;
case REVERSENOT:
u += *num;
if (item == item2) {
if (u > setsize1) {
u += SETINC;
if ((item1 = realloc(
item1, u * sizeof(POSTING))) == NULL) {
goto cannotalloc;
}
setsize1 = u;
}
newitem = item1;
}
else {
if (u > setsize2) {
u += SETINC;
if ((item2 = realloc(
item2, u * sizeof(POSTING))) == NULL) {
cannotalloc:
invcannotalloc(u * sizeof(POSTING));
boolready();
*num = -1;
return(NULL);
}
setsize2 = u;
}
newitem = item2;
}
set1p = item;
newsetp = newitem;
}
file = invcntl->postfile;
fseek(file, *ptr2, SEEK_SET);
fread(&posting, sizeof(posting), 1, file);
newsetc = 0;
switch (boolarg) {
case BOOL_OR:
set1p = item;
newsetp = newitem;
for (set1c = 0, set2c = 0;
set1c < numitems && set2c < *num; newsetc++) {
if (set1p->lineoffset < posting.lineoffset) {
*newsetp++ = *set1p++;
set1c++;
}
else if (set1p->lineoffset > posting.lineoffset) {
*newsetp++ = posting;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else if (set1p->type < posting.type) {
*newsetp++ = *set1p++;
set1c++;
}
else if (set1p->type > posting.type) {
*newsetp++ = posting;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else {
*newsetp++ = *set1p++;
set1c++;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
}
if (set1c < numitems) {
newsetc += numitems - set1c;
while (set1c++ < numitems) {
*newsetp++ = *set1p++;
}
} else {
while (set2c++ < *num) {
*newsetp++ = posting;
newsetc++;
fread(&posting, (int) sizeof(posting), 1, file);
}
}
item = newitem;
break;
#if 0
case AND:
for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
if (set1p->lineoffset < posting.lineoffset) {
set1p++;
set1c++;
}
else if (set1p->lineoffset > posting.lineoffset) {
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else if (set1p->type < posting.type) {
*set1p++;
set1c++;
}
else if (set1p->type > posting.type) {
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else {
*newsetp++ = *set1p++;
newsetc++;
set1c++;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
}
break;
case NOT:
for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
if (set1p->lineoffset < posting.lineoffset) {
*newsetp++ = *set1p++;
newsetc++;
set1c++;
}
else if (set1p->lineoffset > posting.lineoffset) {
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else if (set1p->type < posting.type) {
*newsetp++ = *set1p++;
newsetc++;
set1c++;
}
else if (set1p->type > posting.type) {
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else {
set1c++;
set1p++;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
}
newsetc += numitems - set1c;
while (set1c++ < numitems) {
*newsetp++ = *set1p++;
}
break;
case REVERSENOT:
for (set1c = 0, set2c = 0; set1c < numitems && set2c < *num; ) {
if (set1p->lineoffset < posting.lineoffset) {
set1p++;
set1c++;
}
else if (set1p->lineoffset > posting.lineoffset) {
*newsetp++ = posting;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else if (set1p->type < posting.type) {
set1p++;
set1c++;
}
else if (set1p->type > posting.type) {
*newsetp++ = posting;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
else {
set1c++;
set1p++;
fread(&posting, (int) sizeof(posting), 1, file);
set2c++;
}
}
while (set2c++ < *num) {
*newsetp++ = posting;
newsetc++;
fread(&posting, (int) sizeof(posting), 1, file);
}
item = newitem;
break;
#endif
}
numitems = newsetc;
*num = newsetc;
enditem = (POSTING *) newsetp;
return((POSTING *) item);
}
#if 0
POSTING *
boolsave(int clear)
{
int i;
POSTING *ptr;
POSTING *oldstuff, *newstuff;
if (numitems == 0) {
if (clear)
boolclear();
return(NULL);
}
if (clear) {
ptr = item;
if (item == item1)
item1 = NULL;
else
item2 = NULL;
boolready();
return(ptr);
}
i = (enditem - item) * sizeof(POSTING) + 100;
if ((ptr = malloc(i))r == NULL) {
invcannotalloc(i);
return(ptr);
}
oldstuff = item;
newstuff = ptr;
while (oldstuff < enditem)
*newstuff++ = *oldstuff++;
return(ptr);
}
#endif
static void
invcannotalloc(unsigned n)
{
fprintf(stderr, "%s: cannot allocate %u bytes\n", argv0, n);
}
static void
invcannotopen(char *file)
{
fprintf(stderr, "%s: cannot open file %s\n", argv0, file);
}
static void
invcannotwrite(char *file)
{
perror(argv0);
fprintf(stderr, "%s: write to file %s failed\n", argv0, file);
}