00001
00002
00003
00004
00005
00006
00007
00008
00023 #ifndef MODULO_CC_INCLUDED
00024 #define MODULO_CC_INCLUDED
00025
00026 #include "modulo.H"
00027 #include <iostream>
00028 #include "sqrt_modulo.cc"
00029
00031 namespace numtheory
00032 {
00033
00039 bool strong_probab_prime(const unsigned int p, const unsigned int base)
00040 {
00041
00042 register unsigned int d=p-1;
00043 register unsigned int s=0;
00044 while (!(d&1U)) { d>>=1; ++s; }
00045
00046 if (s==0) return false;
00047
00048 d=powmod(base,d,p);
00049 unsigned int x = d;
00050 while (s && x!=p-1) { x=squaremod(x,p); --s; }
00051
00052 return (d==1 && x!=p-1) || (d!=1 && x==p-1);
00053 }
00054
00063 bool probab_prime(const unsigned int p)
00064 {
00065
00066
00067
00068
00069
00070
00071 return strong_probab_prime(p,2)
00072 && strong_probab_prime(p,7)
00073 && strong_probab_prime(p,61);
00074 }
00075
00076
00084 bool is_prime(register const int p)
00085 {
00086
00087 if (p==2 || p==3) return true;
00088 if ((p%2==0) || (p%3==0)) return false;
00089 register int t = 5;
00090 while (t*t<=p)
00091 {
00092 if ((p%t)==0) return false;
00093 t+=2;
00094 if ((p%t)==0) return false;
00095 t+=4;
00096 }
00097 return true;
00098 }
00099
00100
00106 signed int jacobi (int a, unsigned int b)
00107 {
00108
00109
00110 if (a%b)
00111 {
00112 signed int x = 1;
00113 if (a<0)
00114 {
00115 a=-a;
00116 if ((b-1)&2) x=-x;
00117 }
00118 while (a>1 && b>1)
00119 {
00120 a%=b; if (a==0) return 0;
00121 unsigned int twos = 0;
00122 while ((a&1)==0) { a>>=1; twos++; }
00123 if (twos&1)
00124 {
00125
00126 twos=(b&15)*(b&15);
00127 if (twos&8) x=-x;
00128 }
00129
00130 if (2&a&b) x=-x;
00131 twos=a; a=b; b=twos;
00132 }
00133 return x;
00134 }
00135 else return 0;
00136 }
00137
00138
00151 signed int legendre (signed int a, const unsigned int p)
00152 {
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 #if 0
00166 if ( (p%2==0) || !is_prime(p) )
00167 {
00168 cerr << "error in legendre: " << p << " is not an odd prime!" << endl;
00169 exit(1);
00170 }
00171 #endif
00172 return jacobi(a,p);
00173 }
00174
00175
00184 unsigned int invmod(const unsigned int x, const unsigned int m)
00185 {
00186
00187 unsigned int a;
00188 signed int inv;
00189
00190 #ifndef ASM_386
00191 unsigned int b = x;
00192 signed int sw=1;
00193 a=m; inv=0;
00194 while (b)
00195 {
00196
00197 unsigned int q=a/b; unsigned int r=a%b;
00198 a=b; b=r;
00199 signed h=inv-sw*q; inv=sw; sw=h;
00200
00201 }
00202 inv = (inv>=0) ? inv : m+inv;
00203
00204 #else
00205
00206 #ifdef ASM_CMOV
00207
00208
00209 unsigned int temp;
00210
00211 #ifdef DEBUG
00212 #warning "using cmov for invmod-function"
00213 #endif
00214
00215
00216
00217 asm volatile ( \
00218 "movl %[M],%%eax \n\t" \
00219 "movl %[X],%%ebx \n\t" \
00220 "movl $0,%[inv] # initialize inv \n\t" \
00221 "movl $1,%%ecx \n\t" \
00222 "1: # calc modular inverse\n\t" \
00223 "xorl %%edx,%%edx # prepare long div \n\t" \
00224 "divl %%ebx \n\t" \
00225 "movl %%ebx,%[tmp] \n\t" \
00226 "movl %%edx,%%ebx \n\t" \
00227 "imull %%ecx \n\t" \
00228 "subl %[inv],%%eax \n\t" \
00229 "movl %%ecx,%[inv] \n\t" \
00230 "movl %%eax,%%ecx \n\t" \
00231 "movl %[tmp],%%eax \n\t" \
00232 "negl %%ecx \n\t" \
00233 "testl $0xffff0000,%%ebx \n\t" \
00234 "jnz 1b \n\t" \
00235 "movl %%eax,%%edx \n\t" \
00236 "testl %%ebx,%%ebx \n\t" \
00237 "jz 9f \n\t" \
00238 "shrl $16,%%edx \n\t" \
00239 "cmpl %%ebx,%%edx \n\t" \
00240 "jae 1b # ext gcd loop (big operands 0:32/32) \n\t" \
00241 "movzx %%ax,%%eax \n\t" \
00242 "2: # calc modular inverse (small operands 16:16/16) \n\t" \
00243 "divw %%bx \n\t" \
00244 "movl %%ebx,%[tmp] \n\t" \
00245 "movl %%edx,%%ebx \n\t" \
00246 "imull %%ecx \n\t" \
00247 "subl %[inv],%%eax \n\t" \
00248 "movl %%ecx,%[inv] \n\t" \
00249 "xorl %%edx,%%edx \n\t" \
00250 "movl %%eax,%%ecx \n\t" \
00251 "movl %[tmp],%%eax \n\t" \
00252 "negl %%ecx \n\t" \
00253 "testb $0xff,%%bh \n\t" \
00254 "jnz 2b # ext gcd loop (small ops) \n\t" \
00255 "testb %%bl,%%bl \n\t" \
00256 "jz 9f \n\t" \
00257 "cmpb %%bl,%%ah \n\t" \
00258 "jae 2b \n\t" \
00259 "3: # calc modular inverse (byte operands 8:8/8 \n\t" \
00260 "divb %%bl \n\t" \
00261 "movb %%bl,%%bh \n\t" \
00262 "movb %%ah,%%bl \n\t" \
00263 "movzx %%al,%%eax \n\t" \
00264 "imull %%ecx \n\t" \
00265 "subl %[inv],%%eax \n\t" \
00266 "movl %%ecx,%[inv] \n\t" \
00267 "movl %%eax,%%ecx \n\t" \
00268 "movzx %%bh,%%eax \n\t" \
00269 "negl %%ecx \n\t" \
00270 "cmpb $1,%%bl \n\t" \
00271 "ja 3b # ext gcd loop (byte ops) \n\t" \
00272 "movzx %%bl,%%ebx \n\t" \
00273 "cmovel %%ecx,%[inv] \n\t" \
00274 "cmovel %%ebx,%%eax \n\t" \
00275 "9: # normalize result \n\t" \
00276 "movl %[M],%%edx \n\t" \
00277 "addl %[inv],%%edx \n\t" \
00278 "cmpl $0,%[inv] \n\t" \
00279 "cmovng %%edx,%[inv] \n"
00280 : "=&a" (a), [inv] "=&D" (inv), [tmp] "=&g" (temp) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");
00281
00282 #else
00283 asm volatile ( \
00284 "movl %[M],%%eax \n\t" \
00285 "movl %[X],%%ebx \n\t" \
00286 "movl $0,%[inv] # initialize inv \n\t" \
00287 "movl $1,%%ecx \n\t" \
00288 "1: # calc modular inverse\n\t" \
00289 "xorl %%edx,%%edx # prepare long div \n\t" \
00290 "divl %%ebx \n\t" \
00291 "pushl %%ebx \n\t" \
00292 "movl %%edx,%%ebx \n\t" \
00293 "imull %%ecx \n\t" \
00294 "movl %[inv],%%edx \n\t" \
00295 "subl %%eax,%%edx \n\t" \
00296 "movl %%ecx,%[inv] \n\t" \
00297 "movl %%edx,%%ecx \n\t" \
00298 "popl %%eax \n\t" \
00299 "testl %%ebx,%%ebx \n\t" \
00300 "jz 9f # done \n\t" \
00301 "testl $0xffff0000,%%eax \n\t" \
00302 "jnz 1b # ext gcd loop (big operands) \n\t" \
00303 "2: # calc modular inverse (small operands) \n\t" \
00304 "xorl %%edx,%%edx # prepare (word) div \n\t" \
00305 "divw %%bx \n\t" \
00306 "pushl %%ebx \n\t" \
00307 "movl %%edx,%%ebx \n\t" \
00308 "imull %%ecx \n\t" \
00309 "movl %[inv],%%edx \n\t" \
00310 "subl %%eax,%%edx \n\t" \
00311 "movl %%ecx,%[inv] \n\t" \
00312 "movl %%edx,%%ecx \n\t" \
00313 "popl %%eax \n\t" \
00314 "cmpl $1,%%ebx \n\t" \
00315 "ja 2b # ext gcd loop (small ops) \n\t" \
00316 "jb 9f \n\t" \
00317 "movl %%ecx,%[inv] \n\t" \
00318 "movl %%ebx,%%eax \n\t" \
00319 "9: # normalize result \n\t" \
00320 "cmpl $0,%[inv] \n\t" \
00321 "jg 2f \n\t" \
00322 "addl %[M],%[inv] \n\t" \
00323 "2: \n" \
00324 : "=&a" (a), [inv] "=&D" (inv) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");
00325 #endif
00326 #endif
00327
00328 if (a!=1)
00329 {
00330 std::cerr << "WARNING! invmod: 1/" << x << " (mod " << m << ") does not exist" << std::endl;
00331 exit(1);
00332 }
00333
00334 #if 0
00335 if (mulmod(x%m,inv,m)!=1)
00336 {
00337 std::cerr << "error in invmod: buggy?" << std::endl;
00338
00339
00340 exit(1);
00341 }
00342 #endif
00343
00344 return inv;
00345 }
00346
00347
00358 unsigned int bininvmod(register unsigned int x, register const unsigned int m)
00359 {
00360
00361
00362
00363
00364
00365
00366
00367 #if 1 && defined(ASM_CMOV)
00368 register unsigned int r=0;
00369 asm( \
00370 "mov %[m],%%edx \n\t" \
00371 "1: test $1,%[v] \n\t" \
00372 "jnz 2f \n\t" \
00373 "mov %[m],%%eax \n\t" \
00374 "shr $1,%[v] \n\t" \
00375 "add %[s],%%eax \n\t" \
00376 "shr $1,%%eax \n\t" \
00377 "shr $1,%[s] \n\t" \
00378 "cmovc %%eax,%[s] \n\t" \
00379 "jmp 1b \n\t" \
00380 "2: sub %[v],%%edx \n\t" \
00381 "xor %%eax,%%eax \n\t" \
00382 "sub %[s],%[r] \n\t" \
00383 "cmovb %[m],%%eax \n\t" \
00384 "add %%eax,%[r] \n\t" \
00385 "3: mov %[m],%%eax \n\t" \
00386 "shr $1,%%edx \n\t" \
00387 "add %[r],%%eax \n\t" \
00388 "shr $1,%%eax \n\t" \
00389 "shr $1,%[r] \n\t" \
00390 "cmovc %%eax,%[r] \n\t" \
00391 "test $1,%%edx \n\t" \
00392 "jz 3b \n\t" \
00393 "cmp %[v],%%edx \n\t" \
00394 "ja 2b \n\t" \
00395 "cmp $1,%%edx \n\t" \
00396 "je 9f \n\t" \
00397 "sub %%edx,%[v] \n\t" \
00398 "#jz 9f \n\t" \
00399 "4: xor %%eax,%%eax \n\t" \
00400 "sub %[r],%[s] \n\t" \
00401 "cmovb %[m],%%eax \n\t" \
00402 "add %%eax,%[s] \n\t" \
00403 "5: mov %[m],%%eax \n\t" \
00404 "shr $1,%[v] \n\t" \
00405 "add %[s],%%eax \n\t" \
00406 "shr $1,%%eax \n\t" \
00407 "shr $1,%[s] \n\t" \
00408 "cmovc %%eax,%[s] \n\t" \
00409 "test $1,%[v] \n\t" \
00410 "jz 5b \n\t" \
00411 "sub %%edx,%[v] \n\t" \
00412 "ja 4b \n\t" \
00413 "#je 9f \n\t" \
00414 "add %%edx,%[v] \n\t" \
00415 "cmp $1,%[v] \n\t" \
00416 "jne 2b \n\t" \
00417 "mov %[s],%[r] \n\t" \
00418 "9: # %[r] is the inverse \n" \
00419 : [v] "+r" (x), [r] "+r" (r)
00420 : [m] "r" (m), [s] "r" (1)
00421 : "cc", "eax", "edx" );
00422 return r;
00423 #else
00424
00425 register unsigned int u=m, v=x;
00426 register unsigned int r=0, s=1;
00427
00428 while ((v&1)==0)
00429 {
00430 v>>=1;
00431 s+= (s&1) ? m : 0;
00432 s>>=1;
00433 }
00434
00435 while (true)
00436 {
00437
00438
00439
00440 if (__builtin_expect (v==1, 0)) return s;
00441
00442 do
00443 {
00444 u-=v;
00445 r-= (r>=s) ? s : s-m;
00446 do
00447 {
00448 u>>=1;
00449 r+= (r&1) ? m : 0;
00450 r>>=1;
00451 } while ((u&1)==0);
00452 } while (u>v);
00453
00454
00455
00456
00457 if (__builtin_expect (u==1, 0)) return r;
00458
00459 do
00460 {
00461 v-=u;
00462 s-= (s>=r) ? r : r-m;
00463 do
00464 {
00465 v>>=1;
00466 s+= (s&1) ? m : 0;
00467 s>>=1;
00468 } while ((v&1)==0);
00469 } while (v>=u);
00470
00471 }
00472 #endif
00473 }
00474
00475
00476
00477 #if defined(ASM_386) || defined(ASM_CMOV)
00478
00479
00480
00481
00482 static const char shifts[64] __attribute__ ((aligned (64)))
00483 = { 6,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0, 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0,
00484 5,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0, 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0 };
00485
00486 #define nightshift(VV,RR,KK) \
00487 asm( \
00488 "mov %[v],%%ecx \n\t" \
00489 "and $0x3f,%%ecx \n\t" \
00490 "test $0x7f,%[v] \n\t" \
00491 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00492 "jnz 1f \n\t" \
00493 "bsf %[v],%%ecx \n\t" \
00494 "1: add %%ecx,%[k] \n\t" \
00495 "shr %%cl,%[v] \n\t" \
00496 "shl %%cl,%[r] \n" \
00497 : [v] "+r" (VV), [r] "+r" (RR), [k] "+rm" (KK) : [shifts] "o" (shifts[0]) : "cc", "ecx");
00498 #elif defined(ASM_X86_64)
00499 #define nightshift(VV,RR,KK) \
00500 asm( \
00501 "bsf %[v],%%ecx \n\t" \
00502 "add %%ecx,%[k] \n\t" \
00503 "shr %%cl,%[v] \n\t" \
00504 "shl %%cl,%[r] \n" \
00505 : [v] "+r" (VV), [r] "+r" (RR), [k] "+rm" (KK) : : "cc", "ecx");
00506 #else
00507
00508 #define nightshift(VV,RR,KK) \
00509 while ((VV&1)==0) { VV>>=1; RR<<=1; ++KK; }
00510 #endif
00511
00512
00513
00523 unsigned int montgominvmod(register unsigned int x, unsigned int m)
00524 {
00525
00526
00527
00528
00529
00530
00531
00532 using std::cout;
00533 using std::endl;
00534
00535 #if 1 && defined(ASM_X86_64)
00536
00537 register unsigned int r=0;
00538 register unsigned int s=1;
00539 asm( \
00540 "xor %%eax,%%eax \n\t" \
00541 "#mov %%edx,%%eax \n\t" \
00542 "#shr $32-26,%%edx \n\t" \
00543 "#shl $26,%%eax \n\t" \
00544 "divl %[m] \n\t" \
00545 "mov %[v],%%ecx \n\t" \
00546 "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00547 "mov %[m],%%eax \n\t" \
00548 "bsf %[v],%%ecx \n\t" \
00549 "add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00550 "shr %%cl,%[v] \n\t" \
00551 "shl %%cl,%[r] \n" \
00552 ".balign 16,,8 \n\t" \
00553 "1: # u part\n\t" \
00554 "sub %[v],%%eax \n\t" \
00555 "mov %%eax,%%ecx \n\t" \
00556 "bsf %%eax,%%ecx \n\t" \
00557 "add %[s],%[r] \n\t" \
00558 "add %%ecx,%%edi \n\t" \
00559 "shr %%cl,%%eax \n\t" \
00560 "shl %%cl,%[s] \n\t" \
00561 "cmp %[v],%%eax \n\t" \
00562 "ja 1b \n\t" \
00563 "cmp $1,%%eax \n\t" \
00564 "je 9f \n\t" \
00565 "# v part \n\t" \
00566 "sub %%eax,%[v] \n\t" \
00567 "2: bsf %[v],%%ecx \n\t" \
00568 "add %[r],%[s] \n\t" \
00569 "shr %%cl,%[v] \n\t" \
00570 "shl %%cl,%[r] \n\t" \
00571 "add %%ecx,%%edi \n\t" \
00572 "cmp %%eax,%[v] \n\t" \
00573 "jb 1b \n\t" \
00574 "sub %%eax,%[v] \n\t" \
00575 "jmp 2b \n\t" \
00576 "9: #adjust shift \n\t" \
00577 "mov %[r],%%ecx \n\t" \
00578 "mov %[m],%%eax \n\t" \
00579 "bsf %[r],%%ecx \n\t" \
00580 "shl %%cl,%[r] \n\t" \
00581 "add %%ecx,%%edi \n\t" \
00582 "js 4f # negative-k transform \n\t" \
00583 "jz 5f # zero! \n\t" \
00584 "neg %[r] \n\t" \
00585 "add %%eax,%[r] \n\t" \
00586 "shr %%eax \n\t" \
00587 "inc %%eax \n\t" \
00588 "shr $1,%[r] \n\t" \
00589 "dec %%edi \n\t" \
00590 "jz 8f \n\t" \
00591 "# now the correction part \n\t" \
00592 "# cmov-version \n\t" \
00593 "3: xorl %%ecx,%%ecx \n\t" \
00594 "shr $1,%[r] \n\t" \
00595 "cmovc %%eax,%%ecx \n\t" \
00596 "add %%ecx,%[r] \n\t" \
00597 "dec %%edi \n\t" \
00598 "jnz 3b \n\t" \
00599 "jmp 8f \n\t" \
00600 "5: # case k=0 \n\t" \
00601 "neg %[r] \n\t" \
00602 "add %%eax,%[r] \n\t" \
00603 "jmp 8f \n\t" \
00604 "4: # negative-k transform \n\t" \
00605 "neg %[r] \n\t" \
00606 "add %%eax,%[r] \n\t" \
00607 "0: add %[r],%[r] \n\t" \
00608 "mov %[r],%%ecx \n\t" \
00609 "sub %%eax,%[r] \n\t" \
00610 "cmovb %%ecx,%[r] \n\t" \
00611 "inc %%edi \n\t" \
00612 "jnz 0b \n\t" \
00613 "#jmp 8f \n\t" \
00614 "# or: i386 version without cmov \n\t" \
00615 "#3: shr $1,%[r] \n\t" \
00616 "#sbb %%ecx,%%ecx \n\t" \
00617 "#and %%eax,%%ecx \n\t" \
00618 "#add %%ecx,%[r] \n\t" \
00619 "#dec %%edi \n\t" \
00620 "#jnz 3b \n\t" \
00621 "#jmp 8f \n\t" \
00622 "#5: # case k=0 \n\t" \
00623 "#neg %[r] \n\t" \
00624 "#add %%eax,%[r] \n\t" \
00625 "#jmp 8f \n\t" \
00626 "#4: # negative-k transform (i386 version)\n\t" \
00627 "#sub %[r],%%eax \n\t" \
00628 "#mov %%edi,%%ecx \n\t" \
00629 "#neg %%cl \n\t" \
00630 "#mov %%eax,%%edx \n\t" \
00631 "#add $32,%%edi \n\t" \
00632 "#shl %%cl,%%eax \n\t" \
00633 "#mov %%edi,%%ecx \n\t" \
00634 "#shr %%cl,%%edx \n\t" \
00635 "#divl %[m] \n\t " \
00636 "#mov %%edx,%[r] \n\t" \
00637 "8: \n" \
00638 : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s)
00639 : [m] "rm" (m)
00640 : "cc", "eax", "ecx", "edi" );
00641
00642 #elif 1 && defined(ASM_CMOV)
00643
00644 register unsigned int r=0;
00645 register unsigned int s=1;
00646 asm( \
00647 "xor %%eax,%%eax \n\t" \
00648 "#mov %%edx,%%eax \n\t" \
00649 "#shr $32-26,%%edx \n\t" \
00650 "#shl $26,%%eax \n\t" \
00651 "divl %[m] \n\t" \
00652 "mov %[v],%%ecx \n\t" \
00653 "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00654 "mov %[m],%%eax \n\t" \
00655 "and $0x3f,%%ecx \n\t" \
00656 "test $0x7f,%[v] \n\t" \
00657 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00658 "jnz 0f \n\t" \
00659 "bsf %[v],%%ecx \n\t" \
00660 "0: add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00661 "shr %%cl,%[v] \n\t" \
00662 "shl %%cl,%[r] \n" \
00663 ".balign 16,,8 \n\t" \
00664 "1: # u part\n\t" \
00665 "sub %[v],%%eax \n\t" \
00666 "mov %%eax,%%ecx \n\t" \
00667 "and $0x3f,%%ecx \n\t" \
00668 "test $0x7f,%%eax \n\t" \
00669 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00670 "jnz 0f \n\t" \
00671 "bsf %%eax,%%ecx \n\t" \
00672 "0: add %[s],%[r] \n\t" \
00673 "add %%ecx,%%edi \n\t" \
00674 "shr %%cl,%%eax \n\t" \
00675 "shl %%cl,%[s] \n\t" \
00676 "cmp %[v],%%eax \n\t" \
00677 "ja 1b \n\t" \
00678 "cmp $1,%%eax \n\t" \
00679 "je 9f \n\t" \
00680 "# v part \n\t" \
00681 "sub %%eax,%[v] \n\t" \
00682 "2: mov %[v],%%ecx \n\t" \
00683 "and $0x3f,%%ecx \n\t" \
00684 "test $0x7f,%[v] \n\t" \
00685 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00686 "jnz 0f \n\t" \
00687 "bsf %[v],%%ecx \n\t" \
00688 "0: add %[r],%[s] \n\t" \
00689 "shr %%cl,%[v] \n\t" \
00690 "shl %%cl,%[r] \n\t" \
00691 "add %%ecx,%%edi \n\t" \
00692 "cmp %%eax,%[v] \n\t" \
00693 "jb 1b \n\t" \
00694 "sub %%eax,%[v] \n\t" \
00695 "jmp 2b \n\t" \
00696 "9: #adjust shift \n\t" \
00697 "mov %[r],%%ecx \n\t" \
00698 "mov %[m],%%eax \n\t" \
00699 "and $0x3f,%%ecx \n\t" \
00700 "test $0x7f,%[r] \n\t" \
00701 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00702 "jnz 0f \n\t" \
00703 "bsf %[r],%%ecx \n\t" \
00704 "shl %%cl,%[r] \n\t" \
00705 "0: add %%ecx,%%edi \n\t" \
00706 "js 4f # negative-k transform \n\t" \
00707 "jz 5f # zero! \n\t" \
00708 "neg %[r] \n\t" \
00709 "add %%eax,%[r] \n\t" \
00710 "shr %%eax \n\t" \
00711 "inc %%eax \n\t" \
00712 "shr $1,%[r] \n\t" \
00713 "dec %%edi \n\t" \
00714 "jz 8f \n\t" \
00715 "# now the correction part \n\t" \
00716 "# cmov-version \n\t" \
00717 "3: xorl %%ecx,%%ecx \n\t" \
00718 "shr $1,%[r] \n\t" \
00719 "cmovc %%eax,%%ecx \n\t" \
00720 "add %%ecx,%[r] \n\t" \
00721 "dec %%edi \n\t" \
00722 "jnz 3b \n\t" \
00723 "jmp 8f \n\t" \
00724 "5: # case k=0 \n\t" \
00725 "neg %[r] \n\t" \
00726 "add %%eax,%[r] \n\t" \
00727 "jmp 8f \n\t" \
00728 "4: # negative-k transform \n\t" \
00729 "neg %[r] \n\t" \
00730 "add %%eax,%[r] \n\t" \
00731 "0: add %[r],%[r] \n\t" \
00732 "mov %[r],%%ecx \n\t" \
00733 "sub %%eax,%[r] \n\t" \
00734 "cmovb %%ecx,%[r] \n\t" \
00735 "inc %%edi \n\t" \
00736 "jnz 0b \n\t" \
00737 "#jmp 8f \n\t" \
00738 "# or: i386 version without cmov \n\t" \
00739 "#3: shr $1,%[r] \n\t" \
00740 "#sbb %%ecx,%%ecx \n\t" \
00741 "#and %%eax,%%ecx \n\t" \
00742 "#add %%ecx,%[r] \n\t" \
00743 "#dec %%edi \n\t" \
00744 "#jnz 3b \n\t" \
00745 "#jmp 8f \n\t" \
00746 "#5: # case k=0 \n\t" \
00747 "#neg %[r] \n\t" \
00748 "#add %%eax,%[r] \n\t" \
00749 "#jmp 8f \n\t" \
00750 "#4: # negative-k transform (i386 version)\n\t" \
00751 "#sub %[r],%%eax \n\t" \
00752 "#mov %%edi,%%ecx \n\t" \
00753 "#neg %%cl \n\t" \
00754 "#mov %%eax,%%edx \n\t" \
00755 "#add $32,%%edi \n\t" \
00756 "#shl %%cl,%%eax \n\t" \
00757 "#mov %%edi,%%ecx \n\t" \
00758 "#shr %%cl,%%edx \n\t" \
00759 "#divl %[m] \n\t " \
00760 "#mov %%edx,%[r] \n\t" \
00761 "8: \n" \
00762 : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s)
00763 : [m] "rm" (m), [shifts] "o" (shifts[0])
00764 : "cc", "eax", "ecx", "edi" );
00765
00766 #elif 1 && defined(ASM_386)
00767
00768
00769 register unsigned int r=0;
00770 register unsigned int s=1;
00771 asm( \
00772 "xor %%eax,%%eax \n\t" \
00773 "#mov %%edx,%%eax \n\t" \
00774 "#shr $32-26,%%edx \n\t" \
00775 "#shl $26,%%eax \n\t" \
00776 "divl %[m] \n\t" \
00777 "mov %[v],%%ecx \n\t" \
00778 "mov $-32,%%edi # now r*2^32 mod m \n\t" \
00779 "mov %[m],%%eax \n\t" \
00780 "and $0x3f,%%ecx \n\t" \
00781 "test $0x7f,%[v] \n\t" \
00782 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00783 "jnz 0f \n\t" \
00784 "bsf %[v],%%ecx \n\t" \
00785 "0: add %%ecx,%%edi # without div transform: mov ecx->edi \n\t" \
00786 "shr %%cl,%[v] \n\t" \
00787 "shl %%cl,%[r] \n" \
00788 ".balign 32,,12 \n\t"
00789 "1: # u part\n\t" \
00790 "sub %[v],%%eax \n\t" \
00791 "mov %%eax,%%ecx \n\t" \
00792 "and $0x3f,%%ecx \n\t" \
00793 "test $0x7f,%%eax \n\t" \
00794 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00795 "jnz 0f \n\t" \
00796 "bsf %%eax,%%ecx \n\t" \
00797 "0: add %[s],%[r] \n\t" \
00798 "add %%ecx,%%edi \n\t" \
00799 "shr %%cl,%%eax \n\t" \
00800 "shl %%cl,%[s] \n\t" \
00801 "cmp %[v],%%eax \n\t" \
00802 "ja 1b \n\t" \
00803 "cmp $1,%%eax \n\t" \
00804 "je 9f \n\t" \
00805 "# v part \n\t" \
00806 "sub %%eax,%[v] \n\t" \
00807 "2: mov %[v],%%ecx \n\t" \
00808 "and $0x3f,%%ecx \n\t" \
00809 "test $0x7f,%[v] \n\t" \
00810 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00811 "jnz 0f \n\t" \
00812 "bsf %[v],%%ecx \n\t" \
00813 "0: add %[r],%[s] \n\t" \
00814 "shr %%cl,%[v] \n\t" \
00815 "shl %%cl,%[r] \n\t" \
00816 "add %%ecx,%%edi \n\t" \
00817 "cmp %%eax,%[v] \n\t" \
00818 "jb 1b \n\t" \
00819 "sub %%eax,%[v] \n\t" \
00820 "jmp 2b \n\t" \
00821 "9: #adjust shift \n\t" \
00822 "mov %[r],%%ecx \n\t" \
00823 "mov %[m],%%eax \n\t" \
00824 "and $0x3f,%%ecx \n\t" \
00825 "test $0x7f,%[r] \n\t" \
00826 "movzx %[shifts](%%ecx),%%ecx \n\t" \
00827 "jnz 0f \n\t" \
00828 "bsf %[r],%%ecx \n\t" \
00829 "shl %%cl,%[r] \n\t" \
00830 "0: add %%ecx,%%edi \n\t" \
00831 "js 4f # negative-k transform \n\t" \
00832 "jz 5f # zero! \n\t" \
00833 "neg %[r] \n\t" \
00834 "add %%eax,%[r] \n\t" \
00835 "shr %%eax \n\t" \
00836 "inc %%eax \n\t" \
00837 "shr $1,%[r] \n\t" \
00838 "dec %%edi \n\t" \
00839 "jz 8f \n\t" \
00840 "# now the correction part \n\t" \
00841 "# cmov-version \n\t" \
00842 "#3: xorl %%ecx,%%ecx \n\t" \
00843 "#shr $1,%[r] \n\t" \
00844 "#cmovc %%eax,%%ecx \n\t" \
00845 "#add %%ecx,%[r] \n\t" \
00846 "#dec %%edi \n\t" \
00847 "#jnz 3b \n\t" \
00848 "#jmp 8f \n\t" \
00849 "#5: # case k=0 \n\t" \
00850 "#neg %[r] \n\t" \
00851 "#add %%eax,%[r] \n\t" \
00852 "#jmp 8f \n\t" \
00853 "#4: # negative-k transform \n\t" \
00854 "#neg %[r] \n\t" \
00855 "#add %%eax,%[r] \n\t" \
00856 "#0: add %[r],%[r] \n\t" \
00857 "#mov %[r],%%ecx \n\t" \
00858 "#sub %%eax,%[r] \n\t" \
00859 "#cmovb %%ecx,%[r] \n\t" \
00860 "#inc %%edi \n\t" \
00861 "#jnz 0b \n\t" \
00862 "#jmp 8f \n\t" \
00863 "# or: i386 version without cmov \n\t" \
00864 "3: shr $1,%[r] \n\t" \
00865 "sbb %%ecx,%%ecx \n\t" \
00866 "and %%eax,%%ecx \n\t" \
00867 "add %%ecx,%[r] \n\t" \
00868 "dec %%edi \n\t" \
00869 "jnz 3b \n\t" \
00870 "jmp 8f \n\t" \
00871 "5: # case k=0 \n\t" \
00872 "neg %[r] \n\t" \
00873 "add %%eax,%[r] \n\t" \
00874 "jmp 8f \n\t" \
00875 "4: # negative-k transform (i386 version)\n\t" \
00876 "sub %[r],%%eax \n\t" \
00877 "mov %%edi,%%ecx \n\t" \
00878 "neg %%cl \n\t" \
00879 "mov %%eax,%%edx \n\t" \
00880 "add $32,%%edi \n\t" \
00881 "shl %%cl,%%eax \n\t" \
00882 "mov %%edi,%%ecx \n\t" \
00883 "shr %%cl,%%edx \n\t" \
00884 "divl %[m] \n\t " \
00885 "mov %%edx,%[r] \n\t" \
00886 "8: \n" \
00887 : [v] "+d" (x), [r] "+r" (r), [s] "+r" (s)
00888 : [m] "rm" (m), [shifts] "o" (shifts[0])
00889 : "cc", "eax", "ecx", "edi" );
00890
00891 #else
00892
00893 register unsigned int u=m, v=x;
00894 register unsigned int r=0, s=1;
00895 register signed int k=0;
00896
00897 nightshift(v,r,k);
00898
00899 while (true)
00900 {
00901
00902 do
00903 {
00904 u-=v; r+=s;
00905 nightshift(u,s,k);
00906 } while (u>v);
00907
00908
00909 if (__builtin_expect (u==1, 0)) goto loopdone;
00910
00911 do
00912 {
00913 v-=u; s+=r;
00914 nightshift(v,r,k);
00915 } while (v>=u);
00916 }
00917 loopdone:
00918
00919 while ((r&1)==0) { r>>=1; ++k; }
00920 r=m-r;
00921
00922 do { if (r&1) r+=m; r>>=1; } while(--k);
00923
00924 #endif
00925
00926 #if 0
00927 if ((mulmod(r,x,m))!=1)
00928 {
00929 std::cerr << "invmod-error!" << endl;
00930 std::cerr << "check should produce 1, but got " << mulmod(r,x,m) << endl;
00931 std::cerr << "r=" << r << " x=" << x << " m=" << m << endl;
00932 std::cerr << "real invmod should be : " << invmod(x,m) << endl;
00933 exit(1);
00934 }
00935 #endif
00936 return r;
00937 }
00938
00939
00940
00941 #ifdef USE_JASONS_INVMOD
00942 namespace japinvmod
00943 {
00944 #include "invmod.c"
00945 class init { public: init() { init_fast_invmod(); } };
00946 static init at_startup;
00947 }
00948 #endif
00949
00950
00951 }
00952
00953 #endif