modulo.cc

Go to the documentation of this file.
00001 // Modulo-Operations for unsigned int (and signed int)
00002 // written by Thorsten Reinecke (1999-03-12)
00003 // last change: 2007-06-22
00004 
00005 // some of the following functions are necessary because a*b%m will
00006 // return false results if a*b overflows unsigned-int-range.
00007 // these routines are okay for unsigned-int-range/2
00008 
00023 #ifndef MODULO_CC_INCLUDED
00024 #define MODULO_CC_INCLUDED
00025 
00026 #include "modulo.H"
00027 #include <iostream>
00028 #include "sqrt_modulo.cc"
00029 
00031 namespace numtheory
00032 {
00033 
00039 bool strong_probab_prime(const unsigned int p, const unsigned int base)
00040 {
00041   // assumes p>base>1 !!
00042   register unsigned int d=p-1;
00043   register unsigned int s=0;
00044   while (!(d&1U)) { d>>=1; ++s; }
00045   //cout << "d,s: " << d << "," << s << endl;
00046   if (s==0) return false;
00047   //cout << "p,d,s: " << p << "," << d << "," << s << endl;
00048   d=powmod(base,d,p);
00049   unsigned int x = d;
00050   while (s && x!=p-1) { x=squaremod(x,p); --s; } 
00051   //cout << "p,d,s,x: " << p << "," << d << "," << s << "," << x << endl;
00052   return (d==1 && x!=p-1) || (d!=1 && x==p-1);
00053 }
00054 
00063 bool probab_prime(const unsigned int p)
00064 {
00065  // assumes p>61 !!
00066  // actually p is prime if 61<p<4.759.123.141 and p is a probable prime for 2,7 and 61
00067  // (this has been checked by someone before).
00068  //
00069  // problem: mulmod(a,b,n) produces false results if highest
00070  //          significant bit in unsigned int overflows!
00071  return strong_probab_prime(p,2)
00072         && strong_probab_prime(p,7)
00073         && strong_probab_prime(p,61);
00074 }
00075 
00076 
00084 bool is_prime(register const int p)
00085 {
00086   // slow for large p's, but safe for all p's
00087   if (p==2 || p==3) return true;
00088   if ((p%2==0) || (p%3==0)) return false;
00089   register int t = 5;
00090   while (t*t<=p) 
00091     {
00092       if ((p%t)==0) return false;
00093       t+=2;
00094       if ((p%t)==0) return false;
00095       t+=4;
00096     }
00097   return true;
00098 }
00099 
00100 
00106 signed int jacobi (int a, unsigned int b)
00107 {
00108   // returns jacobi(a/b), assumes b odd!
00109 
00110   if (a%b)
00111    {
00112      signed int x = 1;
00113      if (a<0)
00114       {
00115         a=-a;
00116         if ((b-1)&2) x=-x; // (-1/b)=(-1)^((b-1)/2)
00117       }
00118      while (a>1 && b>1)
00119       {
00120         a%=b; if (a==0) return 0;
00121         unsigned int twos = 0;
00122         while ((a&1)==0) { a>>=1; twos++; }
00123         if (twos&1) // only odd two's in a can change the sign!
00124          {
00125           // (2/b)=(-1)^((b^2-1)/8)
00126           twos=(b&15)*(b&15); // but note that overflow wouldn't hurt here...
00127           if (twos&8) x=-x;
00128          }
00129         // a and b odd natural numbers -> (a/b)=(-1)^( (a-1)/2 * (b-1)/2 ) * (b/a)
00130         if (2&a&b) x=-x;
00131         twos=a; a=b; b=twos; // swap loop again...
00132       }
00133      return x;
00134    }
00135   else return 0;   
00136 }
00137 
00138 
00151 signed int legendre (signed int a, const unsigned int p)
00152 {
00153   // returns legendre(a/p), assumes p being an odd prime!
00154   
00155   // warning: legendre is only defined for odd prime p.
00156   //          this function works for all odd p: 
00157   //          it will therefore return jacobi(a/p)
00158   // remember: undefined results can be runerrors, but
00159   //           an undefined result can be anything else, too.
00160   //           So, undefined means, you can't rely on a specific
00161   //           behaviour.
00162 
00163   //           For debugging reasons one may wish to check for
00164   //           odd prime input p.
00165 #if 0
00166   if ( (p%2==0) || !is_prime(p) )
00167    {
00168      cerr << "error in legendre: " << p << " is not an odd prime!" << endl;
00169      exit(1);
00170    }
00171 #endif
00172   return jacobi(a,p);
00173 } 
00174 
00175 
00184 unsigned int invmod(const unsigned int x, const unsigned int m)
00185 {
00186   // returns 1/x mod m (or error if no inverse exists)
00187   unsigned int a;
00188   signed int inv;
00189 
00190 #ifndef ASM_386
00191   unsigned int b = x; //(x>=m) ? x%m : x;
00192   signed int sw=1;
00193   a=m; inv=0;
00194   while (b)
00195    {
00196 //     std::cout << "b=" <<  b << ", a=" << a; 
00197      unsigned int q=a/b; unsigned int r=a%b;
00198      a=b; b=r;
00199      signed h=inv-sw*q; inv=sw; sw=h;
00200 //     std::cout << ", q=" << q << ", r=" << r << ", inv=" << inv << std::endl;
00201    }
00202   inv = (inv>=0) ? inv : m+inv;
00203 
00204 #else
00205 
00206 #ifdef ASM_CMOV
00207 // inline assembler code for AMD Athlon (and other x86 processors providing conditional mov operations)
00208 
00209 unsigned int temp;
00210 
00211 #ifdef DEBUG
00212 #warning "using cmov for invmod-function"
00213 #endif
00214 
00215 // utilize, that division is faster for smaller operands
00216 // also use conditional mov instructions where possible
00217   asm volatile ( \
00218    "movl %[M],%%eax \n\t" \
00219    "movl %[X],%%ebx \n\t" \
00220    "movl $0,%[inv] # initialize inv \n\t" \
00221    "movl $1,%%ecx \n\t" \
00222    "1: # calc modular inverse\n\t" \
00223    "xorl %%edx,%%edx # prepare long div \n\t" \
00224    "divl %%ebx \n\t" \
00225    "movl %%ebx,%[tmp] \n\t" \
00226    "movl %%edx,%%ebx \n\t" \
00227    "imull %%ecx \n\t" \
00228    "subl %[inv],%%eax \n\t" \
00229    "movl %%ecx,%[inv] \n\t" \
00230    "movl %%eax,%%ecx \n\t" \
00231    "movl %[tmp],%%eax \n\t" \
00232    "negl %%ecx \n\t" \
00233    "testl $0xffff0000,%%ebx \n\t" \
00234    "jnz 1b \n\t" \
00235    "movl %%eax,%%edx \n\t" \
00236    "testl %%ebx,%%ebx \n\t" \
00237    "jz 9f \n\t" \
00238    "shrl $16,%%edx \n\t" \
00239    "cmpl %%ebx,%%edx \n\t" \
00240    "jae 1b # ext gcd loop (big operands 0:32/32) \n\t" \
00241    "movzx %%ax,%%eax \n\t" \
00242    "2: # calc modular inverse (small operands 16:16/16) \n\t" \
00243    "divw %%bx \n\t" \
00244    "movl %%ebx,%[tmp] \n\t" \
00245    "movl %%edx,%%ebx \n\t" \
00246    "imull %%ecx \n\t" \
00247    "subl %[inv],%%eax \n\t" \
00248    "movl %%ecx,%[inv] \n\t" \
00249    "xorl %%edx,%%edx \n\t" \
00250    "movl %%eax,%%ecx \n\t" \
00251    "movl %[tmp],%%eax \n\t" \
00252    "negl %%ecx \n\t" \
00253    "testb $0xff,%%bh \n\t" \
00254    "jnz 2b # ext gcd loop (small ops) \n\t" \
00255    "testb %%bl,%%bl \n\t" \
00256    "jz 9f \n\t" \
00257    "cmpb %%bl,%%ah \n\t" \
00258    "jae 2b \n\t" \
00259    "3: # calc modular inverse (byte operands 8:8/8 \n\t" \
00260    "divb %%bl \n\t" \
00261    "movb %%bl,%%bh \n\t" \
00262    "movb %%ah,%%bl \n\t" \
00263    "movzx %%al,%%eax \n\t" \
00264    "imull %%ecx \n\t" \
00265    "subl %[inv],%%eax \n\t" \
00266    "movl %%ecx,%[inv] \n\t" \
00267    "movl %%eax,%%ecx \n\t" \
00268    "movzx %%bh,%%eax \n\t" \
00269    "negl %%ecx \n\t" \
00270    "cmpb $1,%%bl \n\t" \
00271    "ja 3b # ext gcd loop (byte ops) \n\t" \
00272    "movzx %%bl,%%ebx \n\t" \
00273    "cmovel %%ecx,%[inv] \n\t" \
00274    "cmovel %%ebx,%%eax \n\t" \
00275    "9: # normalize result \n\t" \
00276    "movl %[M],%%edx \n\t" \
00277    "addl %[inv],%%edx \n\t" \
00278    "cmpl $0,%[inv] \n\t" \
00279    "cmovng %%edx,%[inv] \n"
00280    : "=&a" (a), [inv] "=&D" (inv), [tmp] "=&g" (temp) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");
00281 
00282 #else /* "standard" ASM_386 handoptimized inline assembler */
00283   asm volatile ( \
00284    "movl %[M],%%eax \n\t" \
00285    "movl %[X],%%ebx \n\t" \
00286    "movl $0,%[inv] # initialize inv \n\t" \
00287    "movl $1,%%ecx \n\t" \
00288    "1: # calc modular inverse\n\t" \
00289    "xorl %%edx,%%edx # prepare long div \n\t" \
00290    "divl %%ebx \n\t" \
00291    "pushl %%ebx \n\t" \
00292    "movl %%edx,%%ebx \n\t" \
00293    "imull %%ecx \n\t" \
00294    "movl %[inv],%%edx \n\t" \
00295    "subl %%eax,%%edx \n\t" \
00296    "movl %%ecx,%[inv] \n\t" \
00297    "movl %%edx,%%ecx \n\t" \
00298    "popl %%eax \n\t" \
00299    "testl %%ebx,%%ebx \n\t" \
00300    "jz 9f # done \n\t" \
00301    "testl $0xffff0000,%%eax \n\t" \
00302    "jnz 1b # ext gcd loop (big operands) \n\t" \
00303    "2: # calc modular inverse (small operands) \n\t" \
00304    "xorl %%edx,%%edx # prepare (word) div \n\t" \
00305    "divw %%bx \n\t" \
00306    "pushl %%ebx \n\t" \
00307    "movl %%edx,%%ebx \n\t" \
00308    "imull %%ecx \n\t" \
00309    "movl %[inv],%%edx \n\t" \
00310    "subl %%eax,%%edx \n\t" \
00311    "movl %%ecx,%[inv] \n\t" \
00312    "movl %%edx,%%ecx \n\t" \
00313    "popl %%eax \n\t" \
00314    "cmpl $1,%%ebx \n\t" \
00315    "ja 2b # ext gcd loop (small ops) \n\t" \
00316    "jb 9f \n\t" \
00317    "movl %%ecx,%[inv] \n\t" \
00318    "movl %%ebx,%%eax \n\t" \
00319    "9: # normalize result \n\t" \
00320    "cmpl $0,%[inv] \n\t" \
00321    "jg 2f \n\t" \
00322    "addl %[M],%[inv] \n\t" \
00323    "2: \n" \
00324    : "=&a" (a), [inv] "=&D" (inv) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");
00325 #endif
00326 #endif
00327 
00328   if (a!=1)
00329    {
00330      std::cerr << "WARNING! invmod: 1/" << x << " (mod " << m << ") does not exist" << std::endl;
00331      exit(1);
00332    }
00333 
00334 #if 0 /* for debugging */
00335   if   (mulmod(x%m,inv,m)!=1)
00336    {
00337      std::cerr << "error in invmod: buggy?" << std::endl;
00338 //     std::cout << "a= " << a << " inv= " << inv <<  std::endl;
00339 //     std::cout << "x= " << x << " m= " << m <<  std::endl;
00340      exit(1);
00341    }
00342 #endif
00343 
00344   return inv;
00345 }
00346 
00347 
00358 unsigned int bininvmod(register unsigned int x, register const unsigned int m)
00359 {
00360   // returns 1/x mod m (or error if no inverse exists)
00361   // crafted binary extended euclidean gcd algorithm 
00362 
00363   // additional constraints:
00364   // 1. m=1 (mod 2),  that is: m is odd
00365   // 2.  0 < x < m
00366 
00367 #if 1 && defined(ASM_CMOV)
00368   register unsigned int r=0;
00369   asm( \
00370    "mov %[m],%%edx \n\t" \
00371    "1: test $1,%[v] \n\t" \
00372    "jnz 2f \n\t" \
00373    "mov %[m],%%eax \n\t" \
00374    "shr $1,%[v] \n\t" \
00375    "add %[s],%%eax \n\t" \
00376    "shr $1,%%eax \n\t" \
00377    "shr $1,%[s] \n\t" \
00378    "cmovc %%eax,%[s] \n\t" \
00379    "jmp 1b \n\t" \
00380    "2: sub %[v],%%edx \n\t" \
00381    "xor %%eax,%%eax \n\t" \
00382    "sub %[s],%[r] \n\t" \
00383    "cmovb %[m],%%eax \n\t" \
00384    "add %%eax,%[r] \n\t" \
00385    "3: mov %[m],%%eax \n\t" \
00386    "shr $1,%%edx \n\t" \
00387    "add %[r],%%eax \n\t" \
00388    "shr $1,%%eax \n\t" \
00389    "shr $1,%[r] \n\t" \
00390    "cmovc %%eax,%[r] \n\t" \
00391    "test $1,%%edx \n\t" \
00392    "jz 3b \n\t" \
00393    "cmp %[v],%%edx \n\t" \
00394    "ja 2b \n\t" \
00395    "cmp $1,%%edx \n\t" \
00396    "je 9f \n\t" \
00397    "sub %%edx,%[v] \n\t" \
00398    "#jz 9f \n\t" \
00399    "4: xor %%eax,%%eax \n\t" \
00400    "sub %[r],%[s] \n\t" \
00401    "cmovb %[m],%%eax \n\t" \
00402    "add %%eax,%[s] \n\t" \
00403    "5: mov %[m],%%eax \n\t" \
00404    "shr $1,%[v] \n\t" \
00405    "add %[s],%%eax \n\t" \
00406    "shr $1,%%eax \n\t" \
00407    "shr $1,%[s] \n\t" \
00408    "cmovc %%eax,%[s] \n\t" \
00409    "test $1,%[v] \n\t" \
00410    "jz 5b \n\t" \
00411    "sub %%edx,%[v] \n\t" \
00412    "ja 4b \n\t" \
00413    "#je 9f \n\t" \
00414    "add %%edx,%[v] \n\t" \
00415    "cmp $1,%[v] \n\t" \
00416    "jne 2b \n\t" \
00417    "mov %[s],%[r] \n\t" \
00418    "9: # %[r] is the inverse \n" \
00419    : [v] "+r" (x), [r] "+r" (r) /* output list */
00420    : [m] "r" (m), [s] "r" (1)   /* input list */
00421    : "cc", "eax", "edx" /* clobber list */  );
00422    return r;
00423 #else
00424   // generic version
00425   register unsigned int u=m, v=x;
00426   register unsigned int r=0, s=1;
00427 
00428   while ((v&1)==0)
00429    {
00430      v>>=1;
00431      s+= (s&1) ? m : 0;
00432      s>>=1;
00433    }
00434 
00435   while (true)
00436    {
00437      // now:
00438      // 1. u,v are odd
00439      // 2. u>=v
00440      if (__builtin_expect (v==1, 0)) return s; // we expect many iterations before terminating
00441 
00442      do
00443       {
00444         u-=v;
00445         r-= (r>=s) ? s : s-m;
00446         do
00447          {
00448            u>>=1;
00449            r+= (r&1) ? m : 0;
00450            r>>=1;
00451          } while ((u&1)==0);
00452       } while (u>v);
00453 
00454      // now:
00455      // 1. u,v are odd
00456      // 2. v>=u
00457      if (__builtin_expect (u==1, 0)) return r; // we expect many iterations before terminating
00458 
00459      do
00460       {
00461         v-=u;
00462         s-= (s>=r) ? r : r-m;
00463         do
00464          {
00465            v>>=1;
00466            s+= (s&1) ? m : 0;
00467            s>>=1;
00468          } while ((v&1)==0);
00469       } while (v>=u);
00470 
00471    }
00472 #endif
00473 }
00474 
00475 
00476 // a small helper macro to speedup montgomery invmod
00477 #if defined(ASM_386) || defined(ASM_CMOV)
00478 // this array is here to avoid the costly "bit-search-forward" instruction for 6-bit values
00479 // on Athlon: bsf %[reg],%%ecx  [vectorpath, latency: 8]
00480 // replacement code: mov %[reg],%%ecx; and $3f,%%ecx; movzx %[shifts](%ecx),%%ecx
00481 //                  [ directpath, latency: 1+1+3 = 4 ]
00482 static const char shifts[64] __attribute__ ((aligned (64))) // 64-byte aligned for cacheline-optimization!
00483  = { 6,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0, 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0,
00484      5,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0, 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0 };
00485  // this is fast i386 inline assembler
00486 #define nightshift(VV,RR,KK) \
00487   asm( \
00488    "mov %[v],%%ecx \n\t" \
00489    "and $0x3f,%%ecx \n\t" \
00490    "test $0x7f,%[v] \n\t" \
00491    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00492    "jnz 1f \n\t" \
00493    "bsf %[v],%%ecx \n\t" \
00494    "1: add %%ecx,%[k] \n\t" \
00495    "shr %%cl,%[v] \n\t" \
00496    "shl %%cl,%[r] \n" \
00497    : [v] "+r" (VV), [r] "+r" (RR), [k] "+rm" (KK) : [shifts] "o" (shifts[0]) : "cc", "ecx");
00498 #elif defined(ASM_X86_64)
00499 #define nightshift(VV,RR,KK) \
00500   asm( \
00501    "bsf %[v],%%ecx \n\t" \
00502    "add %%ecx,%[k] \n\t" \
00503    "shr %%cl,%[v] \n\t" \
00504    "shl %%cl,%[r] \n" \
00505    : [v] "+r" (VV), [r] "+r" (RR), [k] "+rm" (KK) : : "cc", "ecx");
00506 #else
00507  // and this is slow C++
00508 #define nightshift(VV,RR,KK) \
00509   while ((VV&1)==0) { VV>>=1; RR<<=1; ++KK; }
00510 #endif 
00511  // and this is the end of this macro ;-)
00512 
00513 
00523 unsigned int montgominvmod(register unsigned int x, unsigned int m)
00524 {
00525   // returns 1/x mod m (or error if no inverse exists)
00526   // crafted montgomery based algorithm for invmod
00527 
00528   // additional constraints:
00529   // 1. m=1 (mod 2),  that is: m is odd
00530   // 2.  0 < x < m
00531 
00532   using std::cout;
00533   using std::endl;
00534 
00535 #if 1 && defined(ASM_X86_64)
00536   //unsigned int h=x;
00537   register unsigned int r=0;
00538   register unsigned int s=1;
00539   asm( \
00540    "xor %%eax,%%eax \n\t" \
00541    "#mov %%edx,%%eax \n\t" \
00542    "#shr $32-26,%%edx \n\t" \
00543    "#shl $26,%%eax \n\t" \
00544    "divl %[m] \n\t" \
00545    "mov %[v],%%ecx \n\t" \
00546    "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00547    "mov %[m],%%eax \n\t" \
00548    "bsf %[v],%%ecx \n\t" \
00549    "add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00550    "shr %%cl,%[v] \n\t" \
00551    "shl %%cl,%[r] \n" \
00552    ".balign 16,,8 \n\t" \
00553    "1: # u part\n\t" \
00554    "sub %[v],%%eax \n\t" \
00555    "mov %%eax,%%ecx \n\t" \
00556    "bsf %%eax,%%ecx \n\t" \
00557    "add %[s],%[r] \n\t" \
00558    "add %%ecx,%%edi \n\t" \
00559    "shr %%cl,%%eax \n\t" \
00560    "shl %%cl,%[s] \n\t" \
00561    "cmp %[v],%%eax \n\t" \
00562    "ja 1b \n\t" \
00563    "cmp $1,%%eax \n\t" \
00564    "je 9f \n\t" \
00565    "# v part \n\t" \
00566    "sub %%eax,%[v] \n\t" \
00567    "2: bsf %[v],%%ecx \n\t" \
00568    "add %[r],%[s] \n\t" \
00569    "shr %%cl,%[v] \n\t" \
00570    "shl %%cl,%[r] \n\t" \
00571    "add %%ecx,%%edi \n\t" \
00572    "cmp %%eax,%[v] \n\t" \
00573    "jb 1b \n\t" \
00574    "sub %%eax,%[v] \n\t" \
00575    "jmp 2b \n\t" \
00576    "9: #adjust shift \n\t" \
00577    "mov %[r],%%ecx \n\t" \
00578    "mov %[m],%%eax \n\t" \
00579    "bsf %[r],%%ecx \n\t" \
00580    "shl %%cl,%[r] \n\t" \
00581    "add %%ecx,%%edi \n\t" \
00582    "js 4f # negative-k transform \n\t" \
00583    "jz 5f # zero! \n\t" \
00584    "neg %[r] \n\t" \
00585    "add %%eax,%[r] \n\t" \
00586    "shr %%eax \n\t" \
00587    "inc %%eax \n\t" \
00588    "shr $1,%[r] \n\t" \
00589    "dec %%edi \n\t" \
00590    "jz 8f \n\t" \
00591    "# now the correction part \n\t" \
00592    "# cmov-version \n\t" \
00593    "3: xorl %%ecx,%%ecx \n\t" \
00594    "shr $1,%[r] \n\t" \
00595    "cmovc %%eax,%%ecx \n\t" \
00596    "add %%ecx,%[r] \n\t" \
00597    "dec %%edi \n\t" \
00598    "jnz 3b \n\t" \
00599    "jmp 8f \n\t" \
00600    "5: # case k=0 \n\t" \
00601    "neg %[r] \n\t" \
00602    "add %%eax,%[r] \n\t" \
00603    "jmp 8f \n\t" \
00604    "4: # negative-k transform \n\t" \
00605    "neg %[r] \n\t" \
00606    "add %%eax,%[r] \n\t" \
00607    "0: add %[r],%[r] \n\t" \
00608    "mov %[r],%%ecx \n\t" \
00609    "sub %%eax,%[r] \n\t" \
00610    "cmovb %%ecx,%[r] \n\t" \
00611    "inc %%edi \n\t" \
00612    "jnz 0b \n\t" \
00613    "#jmp 8f \n\t" \
00614    "# or: i386 version without cmov \n\t" \
00615    "#3: shr $1,%[r] \n\t" \
00616    "#sbb %%ecx,%%ecx \n\t" \
00617    "#and %%eax,%%ecx \n\t" \
00618    "#add %%ecx,%[r] \n\t" \
00619    "#dec %%edi \n\t" \
00620    "#jnz 3b \n\t" \
00621    "#jmp 8f \n\t" \
00622    "#5: # case k=0 \n\t" \
00623    "#neg %[r] \n\t" \
00624    "#add %%eax,%[r] \n\t" \
00625    "#jmp 8f \n\t" \
00626    "#4: # negative-k transform (i386 version)\n\t" \
00627    "#sub %[r],%%eax \n\t" \
00628    "#mov %%edi,%%ecx \n\t" \
00629    "#neg %%cl \n\t" \
00630    "#mov %%eax,%%edx \n\t" \
00631    "#add $32,%%edi \n\t" \
00632    "#shl %%cl,%%eax \n\t" \
00633    "#mov %%edi,%%ecx \n\t" \
00634    "#shr %%cl,%%edx \n\t" \
00635    "#divl %[m] \n\t " \
00636    "#mov %%edx,%[r] \n\t" \
00637    "8: \n" \
00638    : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s) /* output list */
00639    : [m] "rm" (m) /* input list */
00640    : "cc", "eax", "ecx", "edi"  /* clobber list */  );
00641    //x=h; // needed, if you want to enable modulo-check below!
00642 #elif 1 && defined(ASM_CMOV)
00643   //unsigned int h=x;
00644   register unsigned int r=0;
00645   register unsigned int s=1;
00646   asm( \
00647    "xor %%eax,%%eax \n\t" \
00648    "#mov %%edx,%%eax \n\t" \
00649    "#shr $32-26,%%edx \n\t" \
00650    "#shl $26,%%eax \n\t" \
00651    "divl %[m] \n\t" \
00652    "mov %[v],%%ecx \n\t" \
00653    "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00654    "mov %[m],%%eax \n\t" \
00655    "and $0x3f,%%ecx \n\t" \
00656    "test $0x7f,%[v] \n\t" \
00657    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00658    "jnz 0f \n\t" \
00659    "bsf %[v],%%ecx \n\t" \
00660    "0: add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00661    "shr %%cl,%[v] \n\t" \
00662    "shl %%cl,%[r] \n" \
00663    ".balign 16,,8 \n\t" \
00664    "1: # u part\n\t" \
00665    "sub %[v],%%eax \n\t" \
00666    "mov %%eax,%%ecx \n\t" \
00667    "and $0x3f,%%ecx \n\t" \
00668    "test $0x7f,%%eax \n\t" \
00669    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00670    "jnz 0f \n\t" \
00671    "bsf %%eax,%%ecx \n\t" \
00672    "0: add %[s],%[r] \n\t" \
00673    "add %%ecx,%%edi \n\t" \
00674    "shr %%cl,%%eax \n\t" \
00675    "shl %%cl,%[s] \n\t" \
00676    "cmp %[v],%%eax \n\t" \
00677    "ja 1b \n\t" \
00678    "cmp $1,%%eax \n\t" \
00679    "je 9f \n\t" \
00680    "# v part \n\t" \
00681    "sub %%eax,%[v] \n\t" \
00682    "2: mov %[v],%%ecx \n\t" \
00683    "and $0x3f,%%ecx \n\t" \
00684    "test $0x7f,%[v] \n\t" \
00685    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00686    "jnz 0f \n\t" \
00687    "bsf %[v],%%ecx \n\t" \
00688    "0: add %[r],%[s] \n\t" \
00689    "shr %%cl,%[v] \n\t" \
00690    "shl %%cl,%[r] \n\t" \
00691    "add %%ecx,%%edi \n\t" \
00692    "cmp %%eax,%[v] \n\t" \
00693    "jb 1b \n\t" \
00694    "sub %%eax,%[v] \n\t" \
00695    "jmp 2b \n\t" \
00696    "9: #adjust shift \n\t" \
00697    "mov %[r],%%ecx \n\t" \
00698    "mov %[m],%%eax \n\t" \
00699    "and $0x3f,%%ecx \n\t" \
00700    "test $0x7f,%[r] \n\t" \
00701    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00702    "jnz 0f \n\t" \
00703    "bsf %[r],%%ecx \n\t" \
00704    "shl %%cl,%[r] \n\t" \
00705    "0: add %%ecx,%%edi \n\t" \
00706    "js 4f # negative-k transform \n\t" \
00707    "jz 5f # zero! \n\t" \
00708    "neg %[r] \n\t" \
00709    "add %%eax,%[r] \n\t" \
00710    "shr %%eax \n\t" \
00711    "inc %%eax \n\t" \
00712    "shr $1,%[r] \n\t" \
00713    "dec %%edi \n\t" \
00714    "jz 8f \n\t" \
00715    "# now the correction part \n\t" \
00716    "# cmov-version \n\t" \
00717    "3: xorl %%ecx,%%ecx \n\t" \
00718    "shr $1,%[r] \n\t" \
00719    "cmovc %%eax,%%ecx \n\t" \
00720    "add %%ecx,%[r] \n\t" \
00721    "dec %%edi \n\t" \
00722    "jnz 3b \n\t" \
00723    "jmp 8f \n\t" \
00724    "5: # case k=0 \n\t" \
00725    "neg %[r] \n\t" \
00726    "add %%eax,%[r] \n\t" \
00727    "jmp 8f \n\t" \
00728    "4: # negative-k transform \n\t" \
00729    "neg %[r] \n\t" \
00730    "add %%eax,%[r] \n\t" \
00731    "0: add %[r],%[r] \n\t" \
00732    "mov %[r],%%ecx \n\t" \
00733    "sub %%eax,%[r] \n\t" \
00734    "cmovb %%ecx,%[r] \n\t" \
00735    "inc %%edi \n\t" \
00736    "jnz 0b \n\t" \
00737    "#jmp 8f \n\t" \
00738    "# or: i386 version without cmov \n\t" \
00739    "#3: shr $1,%[r] \n\t" \
00740    "#sbb %%ecx,%%ecx \n\t" \
00741    "#and %%eax,%%ecx \n\t" \
00742    "#add %%ecx,%[r] \n\t" \
00743    "#dec %%edi \n\t" \
00744    "#jnz 3b \n\t" \
00745    "#jmp 8f \n\t" \
00746    "#5: # case k=0 \n\t" \
00747    "#neg %[r] \n\t" \
00748    "#add %%eax,%[r] \n\t" \
00749    "#jmp 8f \n\t" \
00750    "#4: # negative-k transform (i386 version)\n\t" \
00751    "#sub %[r],%%eax \n\t" \
00752    "#mov %%edi,%%ecx \n\t" \
00753    "#neg %%cl \n\t" \
00754    "#mov %%eax,%%edx \n\t" \
00755    "#add $32,%%edi \n\t" \
00756    "#shl %%cl,%%eax \n\t" \
00757    "#mov %%edi,%%ecx \n\t" \
00758    "#shr %%cl,%%edx \n\t" \
00759    "#divl %[m] \n\t " \
00760    "#mov %%edx,%[r] \n\t" \
00761    "8: \n" \
00762    : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s) /* output list */
00763    : [m] "rm" (m), [shifts] "o" (shifts[0])  /* input list */
00764    : "cc", "eax", "ecx", "edi"  /* clobber list */  );
00765    //x=h; // needed, if you want to enable modulo-check below!
00766 #elif 1 && defined(ASM_386)
00767   // version without cmov
00768   //unsigned int h=x;
00769   register unsigned int r=0;
00770   register unsigned int s=1;
00771   asm( \
00772    "xor %%eax,%%eax \n\t" \
00773    "#mov %%edx,%%eax \n\t" \
00774    "#shr $32-26,%%edx \n\t" \
00775    "#shl $26,%%eax \n\t" \
00776    "divl %[m] \n\t" \
00777    "mov %[v],%%ecx \n\t" \
00778    "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00779    "mov %[m],%%eax \n\t" \
00780    "and $0x3f,%%ecx \n\t" \
00781    "test $0x7f,%[v] \n\t" \
00782    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00783    "jnz 0f \n\t" \
00784    "bsf %[v],%%ecx \n\t" \
00785    "0: add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00786    "shr %%cl,%[v] \n\t" \
00787    "shl %%cl,%[r] \n" \
00788    ".balign 32,,12 \n\t"
00789    "1: # u part\n\t" \
00790    "sub %[v],%%eax \n\t" \
00791    "mov %%eax,%%ecx \n\t" \
00792    "and $0x3f,%%ecx \n\t" \
00793    "test $0x7f,%%eax \n\t" \
00794    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00795    "jnz 0f \n\t" \
00796    "bsf %%eax,%%ecx \n\t" \
00797    "0: add %[s],%[r] \n\t" \
00798    "add %%ecx,%%edi \n\t" \
00799    "shr %%cl,%%eax \n\t" \
00800    "shl %%cl,%[s] \n\t" \
00801    "cmp %[v],%%eax \n\t" \
00802    "ja 1b \n\t" \
00803    "cmp $1,%%eax \n\t" \
00804    "je 9f \n\t" \
00805    "# v part \n\t" \
00806    "sub %%eax,%[v] \n\t" \
00807    "2: mov %[v],%%ecx \n\t" \
00808    "and $0x3f,%%ecx \n\t" \
00809    "test $0x7f,%[v] \n\t" \
00810    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00811    "jnz 0f \n\t" \
00812    "bsf %[v],%%ecx \n\t" \
00813    "0: add %[r],%[s] \n\t" \
00814    "shr %%cl,%[v] \n\t" \
00815    "shl %%cl,%[r] \n\t" \
00816    "add %%ecx,%%edi \n\t" \
00817    "cmp %%eax,%[v] \n\t" \
00818    "jb 1b \n\t" \
00819    "sub %%eax,%[v] \n\t" \
00820    "jmp 2b \n\t" \
00821    "9: #adjust shift \n\t" \
00822    "mov %[r],%%ecx \n\t" \
00823    "mov %[m],%%eax \n\t" \
00824    "and $0x3f,%%ecx \n\t" \
00825    "test $0x7f,%[r] \n\t" \
00826    "movzx %[shifts](%%ecx),%%ecx \n\t" \
00827    "jnz 0f \n\t" \
00828    "bsf %[r],%%ecx \n\t" \
00829    "shl %%cl,%[r] \n\t" \
00830    "0: add %%ecx,%%edi \n\t" \
00831    "js 4f # negative-k transform \n\t" \
00832    "jz 5f # zero! \n\t" \
00833    "neg %[r] \n\t" \
00834    "add %%eax,%[r] \n\t" \
00835    "shr %%eax \n\t" \
00836    "inc %%eax \n\t" \
00837    "shr $1,%[r] \n\t" \
00838    "dec %%edi \n\t" \
00839    "jz 8f \n\t" \
00840    "# now the correction part \n\t" \
00841    "# cmov-version \n\t" \
00842    "#3: xorl %%ecx,%%ecx \n\t" \
00843    "#shr $1,%[r] \n\t" \
00844    "#cmovc %%eax,%%ecx \n\t" \
00845    "#add %%ecx,%[r] \n\t" \
00846    "#dec %%edi \n\t" \
00847    "#jnz 3b \n\t" \
00848    "#jmp 8f \n\t" \
00849    "#5: # case k=0 \n\t" \
00850    "#neg %[r] \n\t" \
00851    "#add %%eax,%[r] \n\t" \
00852    "#jmp 8f \n\t" \
00853    "#4: # negative-k transform \n\t" \
00854    "#neg %[r] \n\t" \
00855    "#add %%eax,%[r] \n\t" \
00856    "#0: add %[r],%[r] \n\t" \
00857    "#mov %[r],%%ecx \n\t" \
00858    "#sub %%eax,%[r] \n\t" \
00859    "#cmovb %%ecx,%[r] \n\t" \
00860    "#inc %%edi \n\t" \
00861    "#jnz 0b \n\t" \
00862    "#jmp 8f \n\t" \
00863    "# or: i386 version without cmov \n\t" \
00864    "3: shr $1,%[r] \n\t" \
00865    "sbb %%ecx,%%ecx \n\t" \
00866    "and %%eax,%%ecx \n\t" \
00867    "add %%ecx,%[r] \n\t" \
00868    "dec %%edi \n\t" \
00869    "jnz 3b \n\t" \
00870    "jmp 8f \n\t" \
00871    "5: # case k=0 \n\t" \
00872    "neg %[r] \n\t" \
00873    "add %%eax,%[r] \n\t" \
00874    "jmp 8f \n\t" \
00875    "4: # negative-k transform (i386 version)\n\t" \
00876    "sub %[r],%%eax \n\t" \
00877    "mov %%edi,%%ecx \n\t" \
00878    "neg %%cl \n\t" \
00879    "mov %%eax,%%edx \n\t" \
00880    "add $32,%%edi \n\t" \
00881    "shl %%cl,%%eax \n\t" \
00882    "mov %%edi,%%ecx \n\t" \
00883    "shr %%cl,%%edx \n\t" \
00884    "divl %[m] \n\t " \
00885    "mov %%edx,%[r] \n\t" \
00886    "8: \n" \
00887    : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s) /* output list */
00888    : [m] "rm" (m), [shifts] "o" (shifts[0])  /* input list */
00889    : "cc", "eax", "ecx", "edi"  /* clobber list */  );
00890    //x=h; // needed, if you want to enable modulo-check below!
00891 #else
00892   // generic version
00893   register unsigned int u=m, v=x;
00894   register unsigned int r=0, s=1;
00895   register signed int k=0;
00896 
00897   nightshift(v,r,k); // = while ((v&1)==0) { v>>=1; r<<=1; ++k; }
00898 
00899   while (true)
00900    {
00901      // now: u,v odd, u>=v
00902      do
00903       {
00904         u-=v; r+=s;
00905         nightshift(u,s,k); // --> do { u>>=1; s<<=1; ++k; } while ((u&1)==0);
00906       } while (u>v);
00907 
00908      // now: u,v odd, v>=u
00909      if (__builtin_expect (u==1, 0)) goto loopdone; // we expect many iterations before terminating
00910 
00911      do
00912       {
00913         v-=u; s+=r;
00914         nightshift(v,r,k); // --> do { v>>=1; r<<=1; ++k; } while ((v&1)==0);
00915       } while (v>=u);
00916    }
00917  loopdone:
00918 
00919   while ((r&1)==0) { r>>=1; ++k; }
00920   r=m-r; // now r is even
00921 
00922   do { if (r&1) r+=m; r>>=1; } while(--k);
00923 
00924 #endif /* of asm-cmov / asm-i386 / generic version */
00925 
00926 #if 0
00927   if ((mulmod(r,x,m))!=1)
00928    {
00929      std::cerr << "invmod-error!" << endl;
00930      std::cerr << "check should produce 1, but got " << mulmod(r,x,m) << endl;
00931      std::cerr << "r=" << r << " x=" << x << " m=" << m << endl;
00932      std::cerr << "real invmod should be : " << invmod(x,m) << endl;
00933      exit(1);
00934    }
00935 #endif
00936   return r;
00937 }
00938 
00939 
00940 
00941 #ifdef USE_JASONS_INVMOD
00942  namespace japinvmod
00943  {
00944    #include "invmod.c"
00945    class init { public: init() { init_fast_invmod(); } };
00946    static init at_startup;
00947  }
00948 #endif
00949 
00950 
00951 } // namespace numtheory
00952 
00953 #endif /* MODULO_CC_INCLUDED */

Generated on Wed Nov 7 23:29:25 2007 for Qsieve by  doxygen 1.5.4