/************************************************************** * * giants.c * * Library for large-integer arithmetic. * * The large-gcd implementation is due to J. P. Buhler. * Special mod routines use ideas of R. McIntosh. * Contributions from G. Woltman, A. Powell acknowledged. * * Updates: * 18 Jul 99 REC Added routine fer_mod(), for use when Fermat giant itself is available. * 17 Jul 99 REC Fixed sign bug in fermatmod() * 17 Apr 99 REC Fixed various comment/line wraps * 25 Mar 99 REC G. Woltman/A. Powell fixes Karat. routines * 05 Mar 99 REC Moved invaux, binvaux giants to stack * 05 Mar 99 REC Moved gread/gwrite giants to stack * 05 Mar 99 REC No static on cur_den, cur_recip (A. Powell) * 28 Feb 99 REC Error detection added atop newgiant(). * 27 Feb 99 REC Reduced wasted work in addsignal(). * 27 Feb 99 REC Reduced wasted work in FFTmulg(). * 19 Feb 99 REC Generalized iaddg() per R. Mcintosh. * 2 Feb 99 REC Fixed comments. * 6 Dec 98 REC Fixed yet another Karatsuba glitch. * 1 Dec 98 REC Fixed errant case of addg(). * 28 Nov 98 REC Installed A. Powell's (correct) variant of Karatsuba multiply. * 15 May 98 REC Modified gwrite() to handle huge integers. * 13 May 98 REC Changed to static stack declarations * 11 May 98 REC Installed Karatsuba multiply, to handle * medregion 'twixt grammar- and FFT-multiply. * 1 May 98 JF gshifts now handle bits < 0 correctly. * 30 Apr 98 JF 68k assembler code removed, * stack giant size now invariant and based * on first call of newgiant(), * stack memory leaks fixed. * 29 Apr 98 JF function prototyes cleaned up, * GCD no longer uses personal stack, * optimized shifts for bits%16 == 0. * 27 Apr 98 JF scratch giants now replaced with stack * 20 Apr 98 JF grammarsquareg fixed for asize == 0. * scratch giants now static. * 29 Jan 98 JF Corrected out-of-range errors in * mersennemod and fermatmod. * 23 Dec 97 REC Sped up divide routines via split-shift. * 18 Nov 97 JF Improved mersennemod, fermatmod. * 9 Nov 97 JF Sped up grammarsquareg. * 20 May 97 RDW Fixed Win32 compiler warnings. * 18 May 97 REC Installed new, fast divide. * 17 May 97 REC Reduced memory for FFT multiply. * 26 Apr 97 REC Creation. * * c. 1997,1998 Perfectly Scientific, Inc. * All Rights Reserved. * **************************************************************/ /* Include Files. */ #include #include #include #include #include "giants.h" /* Compiler options. */ #ifdef _WIN32 #pragma warning( disable : 4127 4706 ) /* disable conditional is constant warning */ #endif /* Global variables. */ int error = 0; int mulmode = AUTO_MUL; int cur_prec = 0; int cur_shift = 0; static int cur_stack_size = 0; static int cur_stack_elem = 0; static int stack_glen = 0; static giant *stack; giant cur_den = NULL, cur_recip = NULL; int current_max_size = 0, cur_run = 0; double * sinCos=NULL; int checkFFTerror = 0; double maxFFTerror; static giant u0=NULL, u1=NULL, v0=NULL, v1=NULL; static double *z = NULL, *z2 = NULL; /* stack handling functions. */ static giant popg(void); static void pushg(int); /* Private function prototypes. */ int gerr(void); double gfloor(double); int radixdiv(int, int, giant); void columnwrite(FILE *, short *, char *, short, int); void normal_addg(giant, giant); void normal_subg(giant, giant); void reverse_subg(giant, giant); int binvaux(giant, giant); int invaux(giant, giant); int allzeros(int, int, giant); void auxmulg(giant a, giant b); void karatmulg(giant a, giant b); void karatsquareg(giant b); void grammarmulg(giant a, giant b); void grammarsquareg(giant b); int lpt(int, int *); void addsignal(giant, double *, int); void FFTsquareg(giant x); void FFTmulg(giant y, giant x); void scramble_real(); void fft_real_to_hermitian(double *z, int n); void fftinv_hermitian_to_real(double *z, int n); void mul_hermitian(double *a, double *b, int n); void square_hermitian(double *b, int n); void giant_to_double(giant x, int sizex, double *z, int L); void gswap(giant *, giant *); void onestep(giant, giant, gmatrix); void mulvM(gmatrix, giant, giant); void mulmM(gmatrix, gmatrix); void writeM(gmatrix); static void punch(giant, gmatrix); static void dotproduct(giant, giant, giant, giant); void fix(giant *, giant *); void hgcd(int, giant, giant, gmatrix); void shgcd(int, int, gmatrix); /************************************************************** * * Functions * **************************************************************/ /************************************************************** * * Initialization and utility functions * **************************************************************/ double gfloor( double f ) { return floor(f); } void init_sinCos( int n ) { int j; double e = TWOPI/n; if (n<=cur_run) return; cur_run = n; if (sinCos) free(sinCos); sinCos = (double *)malloc(sizeof(double)*(1+(n>>2))); for (j=0;j<=(n>>2);j++) { sinCos[j] = sin(e*j); } } double s_sin( int n ) { int seg = n/(cur_run>>2); switch (seg) { case 0: return(sinCos[n]); case 1: return(sinCos[(cur_run>>1)-n]); case 2: return(-sinCos[n-(cur_run>>1)]); case 3: return(-sinCos[cur_run-n]); } return 0; } double s_cos( int n ) { int quart = (cur_run>>2); if (n < quart) return(s_sin(n+quart)); return(-s_sin(n-quart)); } int gerr(void) { return(error); } giant popg ( void ) { int i; if (current_max_size <= 0) current_max_size = MAX_SHORTS; if (cur_stack_size == 0) { /* Initialize the stack if we're just starting out. * Note that all stack giants will be whatever current_max_size is * when newgiant() is first called. */ cur_stack_size = STACK_GROW; stack = (giant *) malloc (cur_stack_size * sizeof(giant)); for(i = 0; i < STACK_GROW; i++) stack[i] = NULL; if (stack_glen == 0) stack_glen = current_max_size; } else if (cur_stack_elem >= cur_stack_size) { /* Expand the stack if we need to. */ i = cur_stack_size; cur_stack_size += STACK_GROW; stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); for (; i < cur_stack_size; i++) stack[i] = NULL; } else if (cur_stack_elem < cur_stack_size - 2*STACK_GROW) { /* Prune the stack if it's too big. Disabled, so the stack can only expand */ /* cur_stack_size -= STACK_GROW; for (i = cur_stack_size - STACK_GROW; i < cur_stack_size; i++) free(stack[i]); stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); */ } /* Malloc our giant. */ if (stack[cur_stack_elem] == NULL) stack[cur_stack_elem] = malloc(stack_glen*sizeof(short)+sizeof(int)); stack[cur_stack_elem]->sign = 0; return(stack[cur_stack_elem++]); } void pushg ( int a ) { if (a < 0) return; cur_stack_elem -= a; if (cur_stack_elem < 0) cur_stack_elem = 0; } giant newgiant( int numshorts ) { int size; giant thegiant; if (numshorts > MAX_SHORTS) { fprintf(stderr, "Requested giant too big.\n"); fflush(stderr); } if (numshorts<=0) numshorts = MAX_SHORTS; size = numshorts*sizeof(short)+sizeof(int); thegiant = (giant)malloc(size); thegiant->sign = 0; if (newmin(2*numshorts,MAX_SHORTS) > current_max_size) current_max_size = newmin(2*numshorts,MAX_SHORTS); /* If newgiant() is being called for the first time, set the * size of the stack giants. */ if (stack_glen == 0) stack_glen = current_max_size; return(thegiant); } gmatrix newgmatrix( void ) /* Allocates space for a gmatrix struct, but does not * allocate space for the giants. */ { return((gmatrix) malloc (4*sizeof(giant))); } int bitlen( giant n ) { int b = 16, c = 1<<15, w; if (isZero(n)) return(0); w = n->n[abs(n->sign) - 1]; while ((w&c) == 0) { b--; c >>= 1; } return (16*(abs(n->sign)-1) + b); } int bitval( giant n, int pos ) { int i = abs(pos)>>4, c = 1 << (pos&15); return ((n->n[i]) & c); } int isone( giant g ) { return((g->sign==1)&&(g->n[0]==1)); } int isZero( giant thegiant ) /* Returns TR if thegiant == 0. */ { register int count; int length = abs(thegiant->sign); register unsigned short * numpointer = thegiant->n; if (length) { for(count = 0; countsign)*sizeof(short); memcpy((char *)destgiant,(char *)srcgiant,numbytes); } void itog( int i, giant g ) /* The giant g becomes set to the integer value i. */ { unsigned int j = abs(i); if (i==0) { g->sign = 0; g->n[0] = 0; return; } g->n[0] = (unsigned short)(j & 0xFFFF); j >>= 16; if (j) { g->n[1] = (unsigned short)j; g->sign = 2; } else { g->sign = 1; } if (i<0) g->sign = -(g->sign); } signed int gtoi( giant x ) /* Calculate the value of an int-sized giant NOT exceeding 31 bits. */ { register int size = abs(x->sign); register int sign = (x->sign < 0) ? -1 : 1; switch(size) { case 0: break; case 1: return sign * x->n[0]; case 2: return sign * (x->n[0]+((x->n[1])<<16)); default: fprintf(stderr,"Giant too large for gtoi\n"); break; } return 0; } int gsign( giant g ) /* Returns the sign of g. */ { if (isZero(g)) return(0); if (g->sign >0) return(1); return(-1); } #if 0 int gcompg(a,b) /* Returns -1,0,1 if ab, respectively. */ giant a,b; { int size = abs(a->sign); if(isZero(a)) size = 0; if (size == 0) { if (isZero(b)) return(0); else return(-gsign(b)); } if (b->sign == 0) return(gsign(a)); if (gsign(a)!=gsign(b)) return(gsign(a)); if (size>abs(b->sign)) return(gsign(a)); if (sizesign)) return(-gsign(a)); do { --size; if (a->n[size] > b->n[size]) return(gsign(a)); if (a->n[size] < b->n[size]) return(-gsign(a)); } while(size>0); return(0); } #else int gcompg( giant a, giant b ) /* Returns -1,0,1 if ab, respectively. */ { int sa = a->sign, j, sb = b->sign, va, vb, sgn; if(sa > sb) return(1); if(sa < sb) return(-1); if(sa < 0) { sa = -sa; /* Take absolute value of sa. */ sgn = -1; } else { sgn = 1; } for(j = sa-1; j >= 0; j--) { va = a->n[j]; vb = b->n[j]; if (va > vb) return(sgn); if (va < vb) return(-sgn); } return(0); } #endif void setmulmode( int mode ) { mulmode = mode; } /************************************************************** * * Private I/O Functions * **************************************************************/ int radixdiv( int base, int divisor, giant thegiant ) /* Divides giant of arbitrary base by divisor. * Returns remainder. Used by idivg and gread. */ { int first = TR; int finalsize = abs(thegiant->sign); int j = finalsize-1; unsigned short *digitpointer=&thegiant->n[j]; unsigned int num,rem=0; if (divisor == 0) { error = DIVIDEBYZERO; exit(error); } while (j>=0) { num=rem*base + *digitpointer; *digitpointer = (unsigned short)(num/divisor); if (first) { if (*digitpointer == 0) --finalsize; else first = FA; } rem = num % divisor; --digitpointer; --j; } if ((divisor<0) ^ (thegiant->sign<0)) finalsize=-finalsize; thegiant->sign=finalsize; return(rem); } void columnwrite( FILE *filepointer, short *column, char *format, short arg, int newlines ) /* Used by gwriteln. */ { char outstring[10]; short i; sprintf(outstring,format,arg); for (i=0; outstring[i]!=0; ++i) { if (newlines) { if (*column >= COLUMNWIDTH) { fputs("\\\n",filepointer); *column = 0; } } fputc(outstring[i],filepointer); ++*column; } } void gwrite( giant thegiant, FILE *filepointer, int newlines ) /* Outputs thegiant to filepointer. Output is terminated by a newline. */ { short column; unsigned int i; unsigned short *numpointer; giant garbagegiant, basetengrand; basetengrand = popg(); garbagegiant = popg(); if (isZero(thegiant)) { fputs("0",filepointer); } else { numpointer = basetengrand->n; gtog(thegiant,garbagegiant); basetengrand->sign = 0; do { *numpointer = (unsigned short)idivg(10000,garbagegiant); ++numpointer; if (++basetengrand->sign >= current_max_size) { error = OVFLOW; exit(error); } } while (!isZero(garbagegiant)); if (!error) { i = basetengrand->sign-1; column = 0; if (thegiant->sign<0 && basetengrand->n[i]!=0) columnwrite(filepointer,&column,"-",0, newlines); columnwrite(filepointer,&column,"%d",basetengrand->n[i],newlines); for( ; i>0; ) { --i; columnwrite(filepointer,&column,"%04d",basetengrand->n[i],newlines); } } } pushg(2); } void gwriteln( giant theg, FILE *filepointer ) { gwrite(theg, filepointer, 1); fputc('\n',filepointer); } void gread( giant theg, FILE *filepointer ) { char currentchar; int isneg,size,backslash=FA,numdigits=0; unsigned short *numpointer; giant basetenthousand; static char *inbuf = NULL; basetenthousand = popg(); if (inbuf == NULL) inbuf = (char*)malloc(MAX_DIGITS); currentchar = (char)fgetc(filepointer); if (currentchar=='-') { isneg=TR; } else { isneg=FA; if (currentchar!='+') ungetc(currentchar,filepointer); } do { currentchar = (char)fgetc(filepointer); if ((currentchar>='0') && (currentchar<='9')) { inbuf[numdigits]=currentchar; if(++numdigits==MAX_DIGITS) break; backslash=FA; } else { if (currentchar=='\\') backslash=TR; } } while(((currentchar!=' ') && (currentchar!='\n') && (currentchar!='\t')) || (backslash) ); if (numdigits) { size = 0; do { inbuf[numdigits] = 0; numdigits-=4; if (numdigits<0) numdigits=0; basetenthousand->n[size] = (unsigned short)strtol(&inbuf[numdigits],NULL,10); ++size; } while (numdigits>0); basetenthousand->sign = size; theg->sign = 0; numpointer = theg->n; do { *numpointer = (unsigned short) radixdiv(10000,1<<(8*sizeof(short)),basetenthousand); ++numpointer; if (++theg->sign >= current_max_size) { error = OVFLOW; exit(error); } } while (!isZero(basetenthousand)); if (isneg) theg->sign = -theg->sign; } pushg(1); } /************************************************************** * * Private Math Functions * **************************************************************/ void negg( giant g ) /* g becomes -g. */ { g->sign = -g->sign; } void absg( giant g ) { /* g becomes the absolute value of g. */ if (g->sign <0) g->sign=-g->sign; } void iaddg( int i, giant g ) /* Giant g becomes g + (int)i. */ { int w,j=0,carry = 0, size = abs(g->sign); giant tmp; if (isZero(g)) { itog(i,g); } else if(g->sign < 0) { tmp = popg(); itog(i, tmp); addg(tmp, g); pushg(1); return; } else { w = g->n[0]+i; do { g->n[j] = (unsigned short) (w & 65535L); carry = w >> 16; w = g->n[++j]+carry; } while ((carry!=0) && (jsign; g->n[size] = (unsigned short)carry; } } /* New subtract routines. The basic subtract "subg()" uses the following logic table: a b if(b > a) if(a > b) + + b := b - a b := -(a - b) - + b := b + (-a) N.A. + - N.A. b := -((-b) + a) - - b := (-a) - (-b) b := -((-b) - (-a)) The basic addition routine "addg()" uses: a b if(b > -a) if(-a > b) + + b := b + a N.A. - + b := b - (-a) b := -((-a) - b) + - b := a - (-b) b := -((-b) - a) - - N.A. b := -((-b) + (-a)) In this way, internal routines "normal_addg," "normal_subg," and "reverse_subg;" each of which assumes non-negative operands and a non-negative result, are now used for greater efficiency. */ void normal_addg( giant a, giant b ) /* b := a + b, both a,b assumed non-negative. */ { int carry = 0; int asize = a->sign, bsize = b->sign; long k; int j=0; unsigned short *aptr = a->n, *bptr = b->n; if (asize < bsize) { for (j=0; j= 65536L) { k -= 65536L; ++carry; } *bptr++ = (unsigned short)k; } for (j=asize; j= 65536L) { k -= 65536L; ++carry; } *bptr++ = (unsigned short)k; } } else { for (j=0; j= 65536L) { k -= 65536L; ++carry; } *bptr++ = (unsigned short)k; } for (j=bsize; j= 65536L) { k -= 65536L; ++carry; } *bptr++ = (unsigned short)k; } } if (carry) { *bptr = 1; ++j; } b->sign = j; } void normal_subg( giant a, giant b ) /* b := b - a; requires b, a non-negative and b >= a. */ { int j, size = b->sign; unsigned int k; if (a->sign == 0) return; k = 0; for (j=0; jsign; ++j) { k += 0xffff - a->n[j] + b->n[j]; b->n[j] = (unsigned short)(k & 0xffff); k >>= 16; } for (j=a->sign; jn[j]; b->n[j] = (unsigned short)(k & 0xffff); k >>= 16; } if (b->n[0] == 0xffff) iaddg(1,b); else ++b->n[0]; while ((size-- > 0) && (b->n[size]==0)); b->sign = (b->n[size]==0) ? 0 : size+1; } void reverse_subg( giant a, giant b ) /* b := a - b; requires b, a non-negative and a >= b. */ { int j, size = a->sign; unsigned int k; k = 0; for (j=0; jsign; ++j) { k += 0xffff - b->n[j] + a->n[j]; b->n[j] = (unsigned short)(k & 0xffff); k >>= 16; } for (j=b->sign; jn[j]; b->n[j] = (unsigned short)(k & 0xffff); k >>= 16; } b->sign = size; /* REC, 21 Apr 1996. */ if (b->n[0] == 0xffff) iaddg(1,b); else ++b->n[0]; while (!b->n[--size]); b->sign = size+1; } void addg( giant a, giant b ) /* b := b + a, any signs any result. */ { int asgn = a->sign, bsgn = b->sign; if (asgn == 0) return; if (bsgn == 0) { gtog(a,b); return; } if ((asgn < 0) == (bsgn < 0)) { if (bsgn > 0) { normal_addg(a,b); return; } absg(b); if(a != b) absg(a); normal_addg(a,b); negg(b); if(a != b) negg(a); return; } if(bsgn > 0) { negg(a); if (gcompg(b,a) >= 0) { normal_subg(a,b); negg(a); return; } reverse_subg(a,b); negg(a); negg(b); return; } negg(b); if(gcompg(b,a) < 0) { reverse_subg(a,b); return; } normal_subg(a,b); negg(b); return; } void subg( giant a, giant b ) /* b := b - a, any signs, any result. */ { int asgn = a->sign, bsgn = b->sign; if (asgn == 0) return; if (bsgn == 0) { gtog(a,b); negg(b); return; } if ((asgn < 0) != (bsgn < 0)) { if (bsgn > 0) { negg(a); normal_addg(a,b); negg(a); return; } negg(b); normal_addg(a,b); negg(b); return; } if (bsgn > 0) { if (gcompg(b,a) >= 0) { normal_subg(a,b); return; } reverse_subg(a,b); negg(b); return; } negg(a); negg(b); if (gcompg(b,a) >= 0) { normal_subg(a,b); negg(a); negg(b); return; } reverse_subg(a,b); negg(a); return; } int numtrailzeros( giant g ) /* Returns the number of trailing zero bits in g. */ { register int numshorts = abs(g->sign), j, bcount=0; register unsigned short gshort, c; for (j=0;jn[j]; c = 1; for (bcount=0;bcount<16; bcount++) { if (c & gshort) break; c <<= 1; } if (bcount<16) break; } return(bcount + 16*j); } void bdivg( giant v, giant u ) /* u becomes greatest power of two not exceeding u/v. */ { int diff = bitlen(u) - bitlen(v); giant scratch7; if (diff<0) { itog(0,u); return; } scratch7 = popg(); gtog(v, scratch7); gshiftleft(diff,scratch7); if (gcompg(u,scratch7) < 0) diff--; if (diff<0) { itog(0,u); pushg(1); return; } itog(1,u); gshiftleft(diff,u); pushg(1); } int binvaux( giant p, giant x ) /* Binary inverse method. Returns zero if no inverse exists, * in which case x becomes GCD(x,p). */ { giant scratch7, u0, u1, v0, v1; if (isone(x)) return(1); u0 = popg(); u1 = popg(); v0 = popg(); v1 = popg(); itog(1, v0); gtog(x, v1); itog(0,x); gtog(p, u1); scratch7 = popg(); while(!isZero(v1)) { gtog(u1, u0); bdivg(v1, u0); gtog(x, scratch7); gtog(v0, x); mulg(u0, v0); subg(v0,scratch7); gtog(scratch7, v0); gtog(u1, scratch7); gtog(v1, u1); mulg(u0, v1); subg(v1,scratch7); gtog(scratch7, v1); } pushg(1); if (!isone(u1)) { gtog(u1,x); if(x->sign<0) addg(p, x); pushg(4); return(0); } if(x->sign<0) addg(p, x); pushg(4); return(1); } int binvg( giant p, giant x ) { modg(p, x); return(binvaux(p,x)); } int invg( giant p, giant x ) { modg(p, x); return(invaux(p,x)); } int invaux( giant p, giant x ) /* Returns zero if no inverse exists, in which case x becomes * GCD(x,p). */ { giant scratch7, u0, u1, v0, v1; if ((x->sign==1)&&(x->n[0]==1)) return(1); u0 = popg(); u1 = popg(); v0 = popg(); v1 = popg(); itog(1,u1); gtog(p, v0); gtog(x, v1); itog(0,x); scratch7 = popg(); while (!isZero(v1)) { gtog(v0, u0); divg(v1, u0); gtog(u0, scratch7); mulg(v1, scratch7); subg(v0, scratch7); negg(scratch7); gtog(v1, v0); gtog(scratch7, v1); gtog(u1, scratch7); mulg(u0, scratch7); subg(x, scratch7); negg(scratch7); gtog(u1,x); gtog(scratch7, u1); } pushg(1); if ((v0->sign!=1)||(v0->n[0]!=1)) { gtog(v0,x); pushg(4); return(0); } if(x->sign<0) addg(p, x); pushg(4); return(1); } int mersenneinvg( int q, giant x ) { int k; giant u0, u1, v1; u0 = popg(); u1 = popg(); v1 = popg(); itog(1, u0); itog(0, u1); itog(1, v1); gshiftleft(q, v1); subg(u0, v1); mersennemod(q, x); while (1) { k = -1; if (isZero(x)) { gtog(v1, x); pushg(3); return(0); } while (bitval(x, ++k) == 0); gshiftright(k, x); if (k) { gshiftleft(q-k, u0); mersennemod(q, u0); } if (isone(x)) break; addg(u1, u0); mersennemod(q, u0); negg(u1); addg(u0, u1); mersennemod(q, u1); if (!gcompg(v1,x)) { pushg(3); return(0); } addg(v1, x); negg(v1); addg(x, v1); mersennemod(q, v1); } gtog(u0, x); mersennemod(q,x); pushg(3); return(1); } void cgcdg( giant a, giant v ) /* Classical Euclid GCD. v becomes gcd(a, v). */ { giant u, r; v->sign = abs(v->sign); if (isZero(a)) return; u = popg(); r = popg(); gtog(a, u); u->sign = abs(u->sign); while (!isZero(v)) { gtog(u, r); modg(v, r); gtog(v, u); gtog(r, v); } gtog(u,v); pushg(2); } void gcdg( giant x, giant y ) { if (bitlen(y)<= GCDLIMIT) bgcdg(x,y); else ggcd(x,y); } void bgcdg( giant a, giant b ) /* Binary form of the gcd. b becomes the gcd of a,b. */ { int k = isZero(b), m = isZero(a); giant u, v, t; if (k || m) { if (m) { if (k) itog(1,b); return; } if (k) { if (m) itog(1,b); else gtog(a,b); return; } } u = popg(); v = popg(); t = popg(); /* Now neither a nor b is zero. */ gtog(a, u); u->sign = abs(a->sign); gtog(b, v); v->sign = abs(b->sign); k = numtrailzeros(u); m = numtrailzeros(v); if (k>m) k = m; gshiftright(k,u); gshiftright(k,v); if (u->n[0] & 1) { gtog(v, t); negg(t); } else { gtog(u,t); } while (!isZero(t)) { m = numtrailzeros(t); gshiftright(m, t); if (t->sign > 0) { gtog(t, u); subg(v,t); } else { gtog(t, v); negg(v); addg(u,t); } } gtog(u,b); gshiftleft(k, b); pushg(3); } void powerg( int m, int n, giant g ) /* g becomes m^n, NO mod performed. */ { giant scratch2 = popg(); itog(1, g); itog(m, scratch2); while (n) { if (n & 1) mulg(scratch2, g); n >>= 1; if (n) squareg(scratch2); } pushg(1); } #if 0 void jtest( giant n ) { if (n->sign) { if (n->n[n->sign-1] == 0) { fprintf(stderr,"%d %d tilt",n->sign, (int)(n->n[n->sign-1])); exit(7); } } } #endif void make_recip( giant d, giant r ) /* r becomes the steady-state reciprocal * 2^(2b)/d, where b = bit-length of d-1. */ { int b; giant tmp, tmp2; if (isZero(d) || (d->sign < 0)) { exit(SIGN); } tmp = popg(); tmp2 = popg(); itog(1, r); subg(r, d); b = bitlen(d); addg(r, d); gshiftleft(b, r); gtog(r, tmp2); while (1) { gtog(r, tmp); squareg(tmp); gshiftright(b, tmp); mulg(d, tmp); gshiftright(b, tmp); addg(r, r); subg(tmp, r); if (gcompg(r, tmp2) <= 0) break; gtog(r, tmp2); } itog(1, tmp); gshiftleft(2*b, tmp); gtog(r, tmp2); mulg(d, tmp2); subg(tmp2, tmp); itog(1, tmp2); while (tmp->sign < 0) { subg(tmp2, r); addg(d, tmp); } pushg(2); } void divg_via_recip( giant d, giant r, giant n ) /* n := n/d, where r is the precalculated * steady-state reciprocal of d. */ { int s = 2*(bitlen(r)-1), sign = gsign(n); giant tmp, tmp2; if (isZero(d) || (d->sign < 0)) { exit(SIGN); } tmp = popg(); tmp2 = popg(); n->sign = abs(n->sign); itog(0, tmp2); while (1) { gtog(n, tmp); mulg(r, tmp); gshiftright(s, tmp); addg(tmp, tmp2); mulg(d, tmp); subg(tmp, n); if (gcompg(n,d) >= 0) { subg(d,n); iaddg(1, tmp2); } if (gcompg(n,d) < 0) break; } gtog(tmp2, n); n->sign *= sign; pushg(2); } #if 1 void modg_via_recip( giant d, giant r, giant n ) /* This is the fastest mod of the present collection. * n := n % d, where r is the precalculated * steady-state reciprocal of d. */ { int s = (bitlen(r)-1), sign = n->sign; giant tmp, tmp2; if (isZero(d) || (d->sign < 0)) { exit(SIGN); } tmp = popg(); tmp2 = popg(); n->sign = abs(n->sign); while (1) { gtog(n, tmp); gshiftright(s-1, tmp); mulg(r, tmp); gshiftright(s+1, tmp); mulg(d, tmp); subg(tmp, n); if (gcompg(n,d) >= 0) subg(d,n); if (gcompg(n,d) < 0) break; } if (sign >= 0) goto done; if (isZero(n)) goto done; negg(n); addg(d,n); done: pushg(2); return; } #else void modg_via_recip( giant d, giant r, giant n ) { int s = 2*(bitlen(r)-1), sign = n->sign; giant tmp, tmp2; if (isZero(d) || (d->sign < 0)) { exit(SIGN); } tmp = popg(); tmp2 = popg() n->sign = abs(n->sign); while (1) { gtog(n, tmp); mulg(r, tmp); gshiftright(s, tmp); mulg(d, tmp); subg(tmp, n); if (gcompg(n,d) >= 0) subg(d,n); if (gcompg(n,d) < 0) break; } if (sign >= 0) goto done; if (isZero(n)) goto done; negg(n); addg(d,n); done: pushg(2); return; } #endif void modg( giant d, giant n ) /* n becomes n%d. n is arbitrary, but the denominator d must be positive! */ { if (cur_recip == NULL) { cur_recip = newgiant(current_max_size); cur_den = newgiant(current_max_size); gtog(d, cur_den); make_recip(d, cur_recip); } else if (gcompg(d, cur_den)) { gtog(d, cur_den); make_recip(d, cur_recip); } modg_via_recip(d, cur_recip, n); } #if 0 int feemulmod ( giant a, giant b, int q, int k ) /* a becomes (a*b) (mod 2^q-k) where q % 16 == 0 and k is "small" (0 < k < 65535). * Returns 0 if unsuccessful, otherwise 1. */ { giant carry, kk, scratch; int i, j; int asize = abs(a->sign), bsize = abs(b->sign); unsigned short *aptr,*bptr,*destptr; unsigned int words; int kpower, curk; if ((q % 16) || (k <= 0) || (k >= 65535)) { return (0); } carry = popg(); kk = popg(); scratch = popg(); for (i=0; in[i]=0; words = q >> 4; bptr = b->n; for (i = 0; i < bsize; i++) { mult = *bptr++; if (mult) { kpower = i/words; if (kpower >= 1) itog (kpower,kk); for (j = 1; j < kpower; k++) smulg(kpower,kk); itog(0,carry); aptr = a->n; for (j = 0; j < bsize; b++) { gtog(kk,scratch); smulg(*aptr++,scratch); smulg(mult,scratch); iaddg(*destptr,scratch); addg(carry,scratch); *destptr++ = scratch->n[0]; gshiftright(scratch,16); gtog(scratch,carry); if (destptr - scratch->n >= words) { smulg(k, carry); smulg(k, kk); destptr -= words; } } } } int i,j,m; unsigned int prod,carry=0; int asize = abs(a->sign), bsize = abs(b->sign); unsigned short *aptr,*bptr,*destptr; unsigned short mult; int words, excess; int temp; giant scratch = popg(), scratch2 = popg(), scratch3 = popg(); short *carryptr = scratch->n; int kpower,kpowerlimit, curk; if ((q % 16) || (k <= 0) || (k >= 65535)) { return (0); } scratch for (i=0; in[i]=0; words = q >> 4; bptr = b->n; for (i=0; in; destptr = scratch->n + i; if (kpower == 0) { carry = 0; } else if (kpower <= kpowerlimit) { carry = 0; curk = k; for (j = 1; j < kpower; j++) curk *= k; } else { itog (k,scratch); for (j = 1; j < kpower; j++) smulg(k,scratch); itog(0,scratch2); } for (j = 0; j < asize; j++) { if(kpower == 0) { prod = *aptr++ * mult + *destptr + carry; *destptr++ = (unsigned short)(prod & 0xFFFF); carry = prod >> 16; } else if (kpower < kpowerlimit) { prod = kcur * *aptr++; temp = prod >> 16; prod &= 0xFFFF; temp *= mult; prod *= mult; temp += prod >> 16; prod &= 0xFFFF; prod += *destptr + carry; carry = prod >> 16 + temp; *destptr++ = (unsigned short)(prod & 0xFFFF); } else { gtog(scratch,scratch3); smulg(*aptr++,scratch3); smulg(mult,scratch3); iaddg(*destptr,scratch3); addg(scratch3,scratch2); *destptr++ = scratch2->n[0]; memmove(scratch2->n,scratch2->n+1,2*(scratch2->size-1)); scratch2->sign--; } if (destptr - scratch->n > words) { if (kpower == 0) { curk = k; carry *= k; } else if (kpower < kpowerlimit) { curk *= k; carry *= curk; } else if (kpower == kpowerlimit) { itog (k,scratch); for (j = 1; j < kpower; j++) smulg(k,scratch); itog(carry,scratch2); smulg(k,scratch2); } else { smulg(k,scratch); smulg(k,scratch2); } kpower++; destptr -= words; } } /* Next, deal with the carry term. Needs to be improved to handle overflow carry cases. */ if (kpower <= kpowerlimit) { iaddg(carry,scratch); } else { addg(scratch2,scratch); } while(scratch->sign > q) gtog(scratch,scratch2) } } scratch->sign = destptr - scratch->n; if (!carry) --(scratch->sign); scratch->sign *= gsign(a)*gsign(b); gtog(scratch,a); pushg(3); return (1); } #endif int idivg( int divisor, giant theg ) { /* theg becomes theg/divisor. Returns remainder. */ int n; int base = 1<<(8*sizeof(short)); n = radixdiv(base,divisor,theg); return(n); } void divg( giant d, giant n ) /* n becomes n/d. n is arbitrary, but the denominator d must be positive! */ { if (cur_recip == NULL) { cur_recip = newgiant(current_max_size); cur_den = newgiant(current_max_size); gtog(d, cur_den); make_recip(d, cur_recip); } else if (gcompg(d, cur_den)) { gtog(d, cur_den); make_recip(d, cur_recip); } divg_via_recip(d, cur_recip, n); } void powermod( giant x, int n, giant g ) /* x becomes x^n (mod g). */ { giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); while (n) { if (n & 1) { mulg(scratch2, x); modg(g, x); } n >>= 1; if (n) { squareg(scratch2); modg(g, scratch2); } } pushg(1); } void powermodg( giant x, giant n, giant g ) /* x becomes x^n (mod g). */ { int len, pos; giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); len = bitlen(n); pos = 0; while (1) { if (bitval(n, pos++)) { mulg(scratch2, x); modg(g, x); } if (pos>=len) break; squareg(scratch2); modg(g, scratch2); } pushg(1); } void fermatpowermod( giant x, int n, int q ) /* x becomes x^n (mod 2^q+1). */ { giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); while (n) { if (n & 1) { mulg(scratch2, x); fermatmod(q, x); } n >>= 1; if (n) { squareg(scratch2); fermatmod(q, scratch2); } } pushg(1); } void fermatpowermodg( giant x, giant n, int q ) /* x becomes x^n (mod 2^q+1). */ { int len, pos; giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); len = bitlen(n); pos = 0; while (1) { if (bitval(n, pos++)) { mulg(scratch2, x); fermatmod(q, x); } if (pos>=len) break; squareg(scratch2); fermatmod(q, scratch2); } pushg(1); } void mersennepowermod( giant x, int n, int q ) /* x becomes x^n (mod 2^q-1). */ { giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); while (n) { if (n & 1) { mulg(scratch2, x); mersennemod(q, x); } n >>= 1; if (n) { squareg(scratch2); mersennemod(q, scratch2); } } pushg(1); } void mersennepowermodg( giant x, giant n, int q ) /* x becomes x^n (mod 2^q-1). */ { int len, pos; giant scratch2 = popg(); gtog(x, scratch2); itog(1, x); len = bitlen(n); pos = 0; while (1) { if (bitval(n, pos++)) { mulg(scratch2, x); mersennemod(q, x); } if (pos>=len) break; squareg(scratch2); mersennemod(q, scratch2); } pushg(1); } void gshiftleft( int bits, giant g ) /* shift g left bits bits. Equivalent to g = g*2^bits. */ { int rem = bits&15, crem = 16-rem, words = bits>>4; int size = abs(g->sign), j, k, sign = gsign(g); unsigned short carry, dat; if (!bits) return; if (!size) return; if (bits < 0) { gshiftright(-bits,g); return; } if (size+words+1 > current_max_size) { error = OVFLOW; exit(error); } if (rem == 0) { memmove(g->n + words, g->n, size * sizeof(short)); for (j = 0; j < words; j++) g->n[j] = 0; g->sign += (g->sign < 0)?(-words):(words); } else { k = size+words; carry = 0; for (j=size-1; j>=0; j--) { dat = g->n[j]; g->n[k--] = (unsigned short)((dat >> crem) | carry); carry = (unsigned short)(dat << rem); } do { g->n[k--] = carry; carry = 0; } while(k>=0); k = size+words; if (g->n[k] == 0) --k; g->sign = sign*(k+1); } } void gshiftright( int bits, giant g ) /* shift g right bits bits. Equivalent to g = g/2^bits. */ { register int j,size=abs(g->sign); register unsigned int carry; int words = bits >> 4; int remain = bits & 15, cremain = (16-remain); if (bits==0) return; if (isZero(g)) return; if (bits < 0) { gshiftleft(-bits,g); return; } if (words >= size) { g->sign = 0; return; } if (remain == 0) { memmove(g->n,g->n + words,(size - words) * sizeof(short)); g->sign += (g->sign < 0)?(words):(-words); } else { size -= words; if (size) { for(j=0;jn[j+words+1] << cremain; g->n[j] = (unsigned short)((g->n[j+words] >> remain ) | carry); } g->n[size-1] = (unsigned short)(g->n[size-1+words] >> remain); } if (g->n[size-1] == 0) --size; if (g->sign > 0) g->sign = size; else g->sign = -size; } } void extractbits( int n, giant src, giant dest ) /* dest becomes lowermost n bits of src. Equivalent to dest = src % 2^n. */ { register int words = n >> 4, numbytes = words*sizeof(short); register int bits = n & 15; if (n<=0) return; if (words >= abs(src->sign)) gtog(src,dest); else { memcpy((char *)(dest->n), (char *)(src->n), numbytes); if (bits) { dest->n[words] = (unsigned short)(src->n[words] & ((1<n[words-1] == 0) && (words > 0)) { --words; } if (src->sign<0) dest->sign = -words; else dest->sign = words; } } int allzeros( int shorts, int bits, giant g ) { int i=shorts; while (i>0) { if (g->n[--i]) return(0); } return((int)(!(g->n[shorts] & ((1<>4, bits = n & 15, i = shorts, mask = 1<g->sign-1; --temp) { g->n[temp] = 0; } if (g->n[shorts] & mask) { /* if high bit is set, -g = 1. */ g->sign = 1; g->n[0] = 1; return; } if (allzeros(shorts,bits,g)) return; /* if g=0, -g = 0. */ while (i>0) { --i; g->n[i] = (unsigned short)(~(g->n[i+1])); } g->n[shorts] ^= mask-1; carry = 2; i = 0; while (carry) { temp = g->n[i]+carry; g->n[i++] = (unsigned short)(temp & 0xffff); carry = temp>>16; } while(!g->n[shorts]) { --shorts; } g->sign = shorts+1; } void mersennemod ( int n, giant g ) /* g := g (mod 2^n - 1) */ { int the_sign, s; giant scratch3 = popg(), scratch4 = popg(); if ((the_sign = gsign(g)) < 0) absg(g); while (bitlen(g) > n) { gtog(g,scratch3); gshiftright(n,scratch3); addg(scratch3,g); gshiftleft(n,scratch3); subg(scratch3,g); } if(!isZero(g)) { if ((s = gsign(g)) < 0) absg(g); itog(1,scratch3); gshiftleft(n,scratch3); itog(1,scratch4); subg(scratch4,scratch3); if(gcompg(g,scratch3) >= 0) subg(scratch3,g); if (s < 0) { g->sign = -g->sign; addg(scratch3,g); } if (the_sign < 0) { g->sign = -g->sign; addg(scratch3,g); } } pushg(2); } void fermatmod ( int n, giant g ) /* g := g (mod 2^n + 1), */ { int the_sign, s; giant scratch3 = popg(); if ((the_sign = gsign(g)) < 0) absg(g); while (bitlen(g) > n) { gtog(g,scratch3); gshiftright(n,scratch3); subg(scratch3,g); gshiftleft(n,scratch3); subg(scratch3,g); } if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave; if(!isZero(g)) { if ((s = gsign(g)) < 0) absg(g); itog(1,scratch3); gshiftleft(n,scratch3); iaddg(1,scratch3); if(gcompg(g,scratch3) >= 0) subg(scratch3,g); if (s * the_sign < 0) { g->sign = -g->sign; addg(scratch3,g); } } leave: pushg(1); } void fer_mod ( int n, giant g, giant modulus ) /* Same as fermatmod(), except modulus = 2^n should be passed if available (i.e. if already allocated and set). */ { int the_sign, s; giant scratch3 = popg(); if ((the_sign = gsign(g)) < 0) absg(g); while (bitlen(g) > n) { gtog(g,scratch3); gshiftright(n,scratch3); subg(scratch3,g); gshiftleft(n,scratch3); subg(scratch3,g); } if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave; if(!isZero(g)) { if ((s = gsign(g)) < 0) absg(g); if(gcompg(g,modulus) >= 0) subg(modulus,g); if (s * the_sign < 0) { g->sign = -g->sign; addg(modulus,g); } } leave: pushg(1); } void smulg( unsigned short i, giant g ) /* g becomes g * i. */ { unsigned short carry = 0; int size = abs(g->sign); register int j,k,mul = abs(i); unsigned short *digit = g->n; for (j=0; j>16); *digit = (unsigned short)(k & 0xffff); ++digit; } if (carry) { if (++j >= current_max_size) { error = OVFLOW; exit(error); } *digit = carry; } if ((g->sign>0) ^ (i>0)) g->sign = -j; else g->sign = j; } void squareg( giant b ) /* b becomes b^2. */ { auxmulg(b,b); } void mulg( giant a, giant b ) /* b becomes a*b. */ { auxmulg(a,b); } void auxmulg( giant a, giant b ) /* Optimized general multiply, b becomes a*b. Modes are: * AUTO_MUL: switch according to empirical speed criteria. * GRAMMAR_MUL: force grammar-school algorithm. * KARAT_MUL: force Karatsuba divide-conquer method. * FFT_MUL: force floating point FFT method. */ { float grammartime; int square = (a==b); int sizea, sizeb; switch (mulmode) { case GRAMMAR_MUL: if (square) grammarsquareg(b); else grammarmulg(a,b); break; case FFT_MUL: if (square) FFTsquareg(b); else FFTmulg(a,b); break; case KARAT_MUL: if (square) karatsquareg(b); else karatmulg(a,b); break; case AUTO_MUL: sizea = abs(a->sign); sizeb = abs(b->sign); if((sizea > KARAT_BREAK) && (sizea <= FFT_BREAK) && (sizeb > KARAT_BREAK) && (sizeb <= FFT_BREAK)){ if(square) karatsquareg(b); else karatmulg(a,b); } else { grammartime = (float)sizea; grammartime *= (float)sizeb; if (grammartime < FFT_BREAK * FFT_BREAK) { if (square) grammarsquareg(b); else grammarmulg(a,b); } else { if (square) FFTsquareg(b); else FFTmulg(a,b); } } break; } } void justg(giant x) { int s = x->sign, sg = 1; if(s<0) { sg = -1; s = -s; } --s; while(x->n[s] == 0) { --s; if(s < 0) break; } x->sign = sg*(s+1); } /* Next, improved Karatsuba routines from A. Powell, improvements by G. Woltman. */ void karatmulg(giant x, giant y) /* y becomes x*y. */ { int s = abs(x->sign), t = abs(y->sign), w, bits, sg = gsign(x)*gsign(y); giant a, b, c, d, e, f; if((s <= KARAT_BREAK) || (t <= KARAT_BREAK)) { grammarmulg(x,y); return; } w = (s + t + 2)/4; bits = 16*w; a = popg(); b = popg(); c = popg(); d = popg(); e = popg(); f = popg(); gtog(x,a); absg(a); if (w <= s) {a->sign = w; justg(a);} gtog(x,b); absg(b); gshiftright(bits, b); gtog(y,c); absg(c); if (w <= t) {c->sign = w; justg(c);} gtog(y,d); absg(d); gshiftright(bits,d); gtog(a,e); normal_addg(b,e); /* e := (a + b) */ gtog(c,f); normal_addg(d,f); /* f := (c + d) */ karatmulg(e,f); /* f := (a + b)(c + d) */ karatmulg(c,a); /* a := a c */ karatmulg(d,b); /* b := b d */ normal_subg(a,f); /* f := (a + b)(c + d) - a c */ normal_subg(b,f); /* f := (a + b)(c + d) - a c - b d */ gshiftleft(bits, b); normal_addg(f, b); gshiftleft(bits, b); normal_addg(a, b); gtog(b, y); y->sign *= sg; pushg(6); return; } void karatsquareg(giant x) /* x becomes x^2. */ { int s = abs(x->sign), w, bits; giant a, b, c; if(s <= KARAT_BREAK) { grammarsquareg(x); return; } w = (s+1)/2; bits = 16*w; a = popg(); b = popg(); c = popg(); gtog(x, a); a->sign = w; justg(a); gtog(x, b); absg(b); gshiftright(bits, b); gtog(a,c); normal_addg(b,c); karatsquareg(c); karatsquareg(a); karatsquareg(b); normal_subg(b, c); normal_subg(a, c); gshiftleft(bits, b); normal_addg(c,b); gshiftleft(bits, b); normal_addg(a, b); gtog(b, x); pushg(3); return; } void grammarmulg( giant a, giant b ) /* b becomes a*b. */ { int i,j; unsigned int prod,carry=0; int asize = abs(a->sign), bsize = abs(b->sign); unsigned short *aptr,*bptr,*destptr; unsigned short mult; giant scratch = popg(); for (i=0; in[i]=0; } bptr = &(b->n[0]); for (i=0; in[0]); destptr = &(scratch->n[i]); for (j=0; j> 16; } *destptr = (unsigned short)carry; } } bsize+=asize; if (!carry) --bsize; scratch->sign = gsign(a)*gsign(b)*bsize; gtog(scratch,b); pushg(1); } void grammarsquareg ( giant a ) /* a := a^2. */ { unsigned int cur_term; unsigned int prod, carry=0, temp; int asize = abs(a->sign), max = asize * 2 - 1; unsigned short *ptr = a->n, *ptr1, *ptr2; giant scratch; if(asize == 0) { itog(0,a); return; } scratch = popg(); asize--; temp = *ptr; temp *= temp; scratch->n[0] = temp; carry = temp >> 16; for (cur_term = 1; cur_term < max; cur_term++) { ptr1 = ptr2 = ptr; if (cur_term <= asize) { ptr2 += cur_term; } else { ptr1 += cur_term - asize; ptr2 += asize; } prod = carry & 0xFFFF; carry >>= 16; while(ptr1 < ptr2) { temp = *ptr1++ * *ptr2--; prod += (temp << 1) & 0xFFFF; carry += (temp >> 15); } if (ptr1 == ptr2) { temp = *ptr1; temp *= temp; prod += temp & 0xFFFF; carry += (temp >> 16); } carry += prod >> 16; scratch->n[cur_term] = (unsigned short) (prod); } if (carry) { scratch->n[cur_term] = carry; scratch->sign = cur_term+1; } else scratch->sign = cur_term; gtog(scratch,a); pushg(1); } /************************************************************** * * FFT multiply Functions * **************************************************************/ int initL = 0; int lpt( int n, int *lambda ) /* Returns least power of two greater than n. */ { register int i = 1; *lambda = 0; while (i maxFFTerror) maxFFTerror = err; } z[j] =0; k = 0; do { g = gfloor(f*TWOM16); z[j+k] += f-g*TWO16; ++k; f=g; } while(f != 0.0); } car = 0; for(j=0;j < last + 1;j++) { m = (int)(z[j]+car); x->n[j] = (unsigned short)(m & 0xffff); car = (m>>16); } if (car) x->n[j] = (unsigned short)car; else --j; while(!(x->n[j])) --j; x->sign = j+1; } void FFTsquareg( giant x ) { int j,size = abs(x->sign); register int L; if (size<4) { grammarmulg(x,x); return; } L = lpt(size+size, &j); if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double)); giant_to_double(x, size, z, L); fft_real_to_hermitian(z, L); square_hermitian(z, L); fftinv_hermitian_to_real(z, L); addsignal(x,z,L); x->sign = abs(x->sign); } void FFTmulg( giant y, giant x ) { /* x becomes y*x. */ int lambda, sizex = abs(x->sign), sizey = abs(y->sign); int finalsign = gsign(x)*gsign(y); register int L; if ((sizex<=4)||(sizey<=4)) { grammarmulg(y,x); return; } L = lpt(sizex+sizey, &lambda); if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double)); if(!z2) z2 = (double *)malloc(MAX_SHORTS * sizeof(double)); giant_to_double(x, sizex, z, L); giant_to_double(y, sizey, z2, L); fft_real_to_hermitian(z, L); fft_real_to_hermitian(z2, L); mul_hermitian(z2, z, L); fftinv_hermitian_to_real(z, L); addsignal(x,z,L); x->sign = finalsign*abs(x->sign); } void scramble_real( double *x, int n ) { register int i,j,k; register double tmp; for (i=0,j=0;i>=1; } j += k; } } void fft_real_to_hermitian( double *z, int n ) /* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). * This is a decimation-in-time, split-radix algorithm. */ { register double cc1, ss1, cc3, ss3; register int is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8, a, a3, b, b3, nminus = n-1, dil, expand; register double *x, e; int nn = n>>1; double t1, t2, t3, t4, t5, t6; register int n2, n4, n8, i, j; init_sinCos(n); expand = cur_run/n; scramble_real(z, n); x = z-1; /* FORTRAN compatibility. */ is = 1; id = 4; do { for (i0=is;i0<=n;i0+=id) { i1 = i0+1; e = x[i0]; x[i0] = e + x[i1]; x[i1] = e - x[i1]; } is = (id<<1)-1; id <<= 2; } while(is>=1) { n2 <<= 1; n4 = n2>>2; n8 = n2>>3; is = 0; id = n2<<1; do { for (i=is;i>1; double t1, t2, t3, t4, t5; int n2, n4, n8, i, j; init_sinCos(n); expand = cur_run/n; x = z-1; n2 = n<<1; while(nn >>= 1) { is = 0; id = n2; n2 >>= 1; n4 = n2>>2; n8 = n4>>1; do { for(i=is;i>1; register double aa, bb, am, bm; b[0] *= a[0]; b[half] *= a[half]; for (k=1;k>1; register double c, d; b[0] *= b[0]; b[half] *= b[half]; for (k=1;kn[j]; } } void gswap( giant *p, giant *q ) { giant t; t = *p; *p = *q; *q = t; } void onestep( giant x, giant y, gmatrix A ) /* Do one step of the euclidean algorithm and modify * the matrix A accordingly. */ { giant s4 = popg(); gtog(x,s4); gtog(y,x); gtog(s4,y); divg(x,s4); punch(s4,A); mulg(x,s4); subg(s4,y); pushg(1); } void mulvM( gmatrix A, giant x, giant y ) /* Multiply vector by Matrix; changes x,y. */ { giant s0 = popg(), s1 = popg(); gtog(A->ur,s0); gtog( A->lr,s1); dotproduct(x,y,A->ul,s0); dotproduct(x,y,A->ll,s1); gtog(s0,x); gtog(s1,y); pushg(2); } void mulmM( gmatrix A, gmatrix B ) /* Multiply matrix by Matrix; changes second matrix. */ { giant s0 = popg(); giant s1 = popg(); giant s2 = popg(); giant s3 = popg(); gtog(B->ul,s0); gtog(B->ur,s1); gtog(B->ll,s2); gtog(B->lr,s3); dotproduct(A->ur,A->ul,B->ll,s0); dotproduct(A->ur,A->ul,B->lr,s1); dotproduct(A->ll,A->lr,B->ul,s2); dotproduct(A->ll,A->lr,B->ur,s3); gtog(s0,B->ul); gtog(s1,B->ur); gtog(s2,B->ll); gtog(s3,B->lr); pushg(4); } void writeM( gmatrix A ) { printf(" ul:"); gout(A->ul); printf(" ur:"); gout(A->ur); printf(" ll:"); gout(A->ll); printf(" lr:"); gout(A->lr); } void punch( giant q, gmatrix A ) /* Multiply the matrix A on the left by [0,1,1,-q]. */ { giant s0 = popg(); gtog(A->ll,s0); mulg(q,A->ll); gswap(&A->ul,&A->ll); subg(A->ul,A->ll); gtog(s0,A->ul); gtog(A->lr,s0); mulg(q,A->lr); gswap(&A->ur,&A->lr); subg(A->ur,A->lr); gtog(s0,A->ur); pushg(1); } static void dotproduct( giant a, giant b, giant c, giant d ) /* Replace last argument with the dot product of two 2-vectors. */ { giant s4 = popg(); gtog(c,s4); mulg(a, s4); mulg(b,d); addg(s4,d); pushg(1); } void ggcd( giant xx, giant yy ) /* A giant gcd. Modifies its arguments. */ { giant x = popg(), y = popg(); gmatrix A = newgmatrix(); gtog(xx,x); gtog(yy,y); for(;;) { fix(&x,&y); if (bitlen(y) <= GCDLIMIT ) break; A->ul = popg(); A->ur = popg(); A->ll = popg(); A->lr = popg(); itog(1,A->ul); itog(0,A->ur); itog(0,A->ll); itog(1,A->lr); hgcd(0,x,y,A); mulvM(A,x,y); pushg(4); fix(&x,&y); if (bitlen(y) <= GCDLIMIT ) break; modg(y,x); gswap(&x,&y); } bgcdg(x,y); gtog(y,yy); pushg(2); free(A); } void fix( giant *p, giant *q ) /* Insure that x > y >= 0. */ { if( gsign(*p) < 0 ) negg(*p); if( gsign(*q) < 0 ) negg(*q); if( gcompg(*p,*q) < 0 ) gswap(p,q); } void hgcd( int n, giant xx, giant yy, gmatrix A ) /* hgcd(n,x,y,A) chops n bits off x and y and computes th * 2 by 2 matrix A such that A[x y] is the pair of terms * in the remainder sequence starting with x,y that is * half the size of x. Note that the argument A is modified * but that the arguments xx and yy are left unchanged. */ { giant x, y; if (isZero(yy)) return; x = popg(); y = popg(); gtog(xx,x); gtog(yy,y); gshiftright(n,x); gshiftright(n,y); if (bitlen(x) <= INTLIMIT ) { shgcd(gtoi(x),gtoi(y),A); } else { gmatrix B = newgmatrix(); int m = bitlen(x)/2; hgcd(m,x,y,A); mulvM(A,x,y); if (gsign(x) < 0) { negg(x); negg(A->ul); negg(A->ur); } if (gsign(y) < 0) { negg(y); negg(A->ll); negg(A->lr); } if (gcompg(x,y) < 0) { gswap(&x,&y); gswap(&A->ul,&A->ll); gswap(&A->ur,&A->lr); } if (!isZero(y)) { onestep(x,y,A); m /= 2; B->ul = popg(); B->ur = popg(); B->ll = popg(); B->lr = popg(); itog(1,B->ul); itog(0,B->ur); itog(0,B->ll); itog(1,B->lr); hgcd(m,x,y,B); mulmM(B,A); pushg(4); } free(B); } pushg(2); } void shgcd( register int x, register int y, gmatrix A ) /* * Do a half gcd on the integers a and b, putting the result in A * It is fairly easy to use the 2 by 2 matrix description of the * extended Euclidean algorithm to prove that the quantity q*t * never overflows. */ { register int q, t, start = x; int Aul = 1, Aur = 0, All = 0, Alr = 1; while (y != 0 && y > start/y) { q = x/y; t = y; y = x%y; x = t; t = All; All = Aul-q*t; Aul = t; t = Alr; Alr = Aur-q*t; Aur = t; } itog(Aul,A->ul); itog(Aur,A->ur); itog(All,A->ll); itog(Alr,A->lr); }