00001
00002
00003
00004
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 #include "modulo.H"
00079 #include "mpz_wrapper.H"
00080
00081 namespace polynomial
00082 {
00083
00084 using namespace my_mpz_wrapper;
00085
00086 class CDFT_base0 : private ForbidAssignment
00087 {
00088 public:
00089 const unsigned int max_size;
00090
00091
00092 static unsigned int calc_max_size(const unsigned int x_size)
00093 {
00094 unsigned int i=2;
00095 while (i<x_size) i<<=1;
00096 return i;
00097 }
00098
00099 inline unsigned int use_size(const unsigned int input_size) const
00100 {
00101 unsigned int i = 2;
00102 while (i<input_size) i<<=1;
00103 if (i>max_size)
00104 {
00105 MARK;
00106 cerr << "input_size is invalid!" << endl;
00107 exit(1);
00108 }
00109 return i;
00110 }
00111
00112 explicit CDFT_base0(const unsigned int x_size)
00113 : max_size(calc_max_size(x_size))
00114 {
00115 }
00116 };
00117
00118 class CDFT_base : public CDFT_base0
00119 {
00120 public:
00121
00122
00123
00124 static void get_valid_primes_for(TPolynom &primes, const unsigned int count,
00125 const mpz_t Start, const unsigned Depth);
00126
00127 private:
00128 mpz_t h;
00129 mpz_t inverse[32];
00130 TPolynom w;
00131
00132 protected:
00133 int size;
00134 mpz_t M;
00135
00136 inline const mpz_t& invpow2(const unsigned int i) const { return inverse[i]; }
00137
00138 void calc_roots_and_inverse();
00139 void convolute(const TPolynom p, const unsigned int n);
00140
00141 public:
00142 explicit CDFT_base(const unsigned int x_size)
00143 : CDFT_base0(x_size), w(new mpz_t[max_size]), size(0)
00144 {
00145 #if defined(VERBOSE_INFO)
00146 MARK; cout << "CDFT_base-constructor for maximum size=" << max_size << endl;
00147 #endif
00148 mpz_init(M);
00149 mpz_init(h);
00150 for (unsigned int i=0; i<max_size; ++i) mpz_init(w[i]);
00151 for (unsigned int i=0; i<32; ++i) mpz_init(inverse[i]);
00152 }
00153
00154 CDFT_base(const unsigned int x_size, const mpz_t x_M)
00155 : CDFT_base0(x_size), w(new mpz_t[max_size]), size(0)
00156 {
00157 #if defined(VERBOSE_INFO)
00158 MARK; cout << "CDFT_base-constructor for maximum size=" << max_size << endl;
00159 #endif
00160 mpz_init(M);
00161 mpz_init(h);
00162 for (unsigned int i=0; i<max_size; ++i) mpz_init(w[i]);
00163 for (unsigned int i=0; i<32; ++i) mpz_init(inverse[i]);
00164 mpz_set(M,x_M);
00165 calc_roots_and_inverse();
00166 }
00167
00168 virtual ~CDFT_base()
00169 {
00170 for (int i=31; i>=0; --i) mpz_clear(inverse[i]);
00171
00172 for (int i=max_size-1; i>=0; --i) mpz_clear(w[i]);
00173 delete [] w;
00174
00175 mpz_clear(h);
00176 mpz_clear(M);
00177 }
00178
00179 int dftmul(const TPolynom R, const int kR,
00180 const TconstPolynom P1, const int k1,
00181 const TconstPolynom P2, const int k2);
00182
00183 friend class CDFT_chinrem;
00184 };
00185
00186
00187
00188 class CDFT : public CDFT_base
00189 {
00190 private:
00191 mpz_t N;
00192
00193
00194 void calc_field_and_roots_and_inverse();
00195
00196 protected:
00197 int internal_mul(const TPolynom R, const int kR,
00198 const TconstPolynom P1, const int k1,
00199 const TconstPolynom P2, const int k2,
00200 const bool reduce_result_modN);
00201
00202 public:
00203
00204
00205
00206
00207
00208 CDFT(const unsigned int x_size, const mpz_t x_N)
00209 : CDFT_base(x_size)
00210 {
00211 #if defined(VERBOSE_INFO)
00212 MARK; cout << "CDFT-constructor for maximum size=" << max_size << endl;
00213 #endif
00214 mpz_init_set(N,x_N);
00215 calc_field_and_roots_and_inverse();
00216 }
00217
00218 virtual ~CDFT()
00219 {
00220 mpz_clear(N);
00221 }
00222
00223 inline const mpz_t& get_N(void) const { return N; }
00224
00225 inline int mul(const TPolynom R, const int kR,
00226 const TconstPolynom P1, const int k1,
00227 const TconstPolynom P2, const int k2)
00228 {
00229 return internal_mul(R,kR,P1,k1,P2,k2,false);
00230 }
00231
00232 inline int mulmod(const TPolynom R, const int kR,
00233 const TconstPolynom P1, const int k1,
00234 const TconstPolynom P2, const int k2)
00235 {
00236 return internal_mul(R,kR,P1,k1,P2,k2,true);
00237 }
00238
00239 inline int square(const TPolynom R, const int kR,
00240 const TconstPolynom P, const int k)
00241 {
00242 return internal_mul(R,kR,P,k,P,k,false);
00243 }
00244
00245 inline int squaremod(const TPolynom R, const int kR,
00246 const TconstPolynom P, const int k)
00247 {
00248 return internal_mul(R,kR,P,k,P,k,true);
00249 }
00250
00251 };
00252
00253
00254 class CDFT_chinrem : public CDFT_base0
00255 {
00256 private:
00257 mpz_t N;
00258
00259
00260 void calc_field_and_roots_and_inverse();
00261
00262 protected:
00263
00264 struct tnode
00265 {
00266 mpz_t M;
00267 mpz_t inv_first_M_mod_second_M;
00268 tnode *left, *right;
00269 CDFT_base *first_dft, *second_dft;
00270 };
00271 void create_nodes(tnode &node, int &count, int depth = 1);
00272 void delete_nodes(tnode &node);
00273 void recurse_dftmul(tnode &node,
00274 const TPolynom R, const int kR,
00275 const TconstPolynom P1, const int k1,
00276 const TconstPolynom P2, const int k2);
00277 tnode root_node;
00278
00279 int anz_dft;
00280 TPolynom Ms;
00281 CDFT_base** dft;
00282 int internal_mul(const TPolynom R, const int kR,
00283 const TconstPolynom P1, const int k1,
00284 const TconstPolynom P2, const int k2,
00285 const bool reduce_result_modN);
00286
00287 public:
00288
00289
00290
00291
00292
00293 CDFT_chinrem(const unsigned int x_size, const mpz_t x_N)
00294 : CDFT_base0(x_size), anz_dft(0), Ms(NULL), dft(NULL)
00295 {
00296 #if defined(VERBOSE_INFO)
00297 MARK; cout << "CDFT-constructor for maximum size=" << max_size << endl;
00298 #endif
00299 mpz_init_set(N,x_N);
00300 calc_field_and_roots_and_inverse();
00301 }
00302
00303 virtual ~CDFT_chinrem()
00304 {
00305 delete_nodes(root_node);
00306 for (int i=0; i<anz_dft; ++i) delete dft[i];
00307 delete [] dft;
00308 for (int i=0; i<anz_dft; ++i) mpz_clear(Ms[i]);
00309 delete [] Ms;
00310 mpz_clear(N);
00311 }
00312
00313 inline const mpz_t& get_N(void) const { return N; }
00314
00315 inline int mul(const TPolynom R, const int kR,
00316 const TconstPolynom P1, const int k1,
00317 const TconstPolynom P2, const int k2)
00318 {
00319 return internal_mul(R,kR,P1,k1,P2,k2,false);
00320 }
00321
00322 inline int mulmod(const TPolynom R, const int kR,
00323 const TconstPolynom P1, const int k1,
00324 const TconstPolynom P2, const int k2)
00325 {
00326 return internal_mul(R,kR,P1,k1,P2,k2,true);
00327 }
00328
00329 inline int square(const TPolynom R, const int kR,
00330 const TconstPolynom P, const int k)
00331 {
00332 return internal_mul(R,kR,P,k,P,k,false);
00333 }
00334
00335 inline int squaremod(const TPolynom R, const int kR,
00336 const TconstPolynom P, const int k)
00337 {
00338 return internal_mul(R,kR,P,k,P,k,true);
00339 }
00340
00341 };
00342
00343
00344
00345
00346
00347
00348 void CDFT_base::get_valid_primes_for(TPolynom &primes, const unsigned int count,
00349 const mpz_t Start, const unsigned Depth)
00350 {
00351
00352
00353
00354
00355 if (primes!=NULL)
00356 {
00357 cout << __FILE__ << ", " << __FUNCTION__ << ": line " << __LINE__ << endl;
00358 cerr << "First parameter is a call by reference," << endl;
00359 cerr << "it should initially point to NULL (to avoid memory-leaks)," << endl;
00360 cerr << "because a new pointer to new data will be generated and" << endl;
00361 cerr << "there is no need for initially data pointed by \"primes!\"" << endl;
00362 exit(1);
00363 }
00364
00365 primes = new mpz_t[count];
00366 for (unsigned int i=0; i<count; ++i) mpz_init(primes[i]);
00367
00368 mpz_t x,M;
00369 mpz_init(x); mpz_init(M);
00370
00371 const size_t bits = mpz_sizeinbase(Start,2)+1;
00372
00373 mpz_set_ui(M,1); mpz_mul_2exp(M,M,bits);
00374 #if 1
00375
00376 if (mpz_cmp_ui(M,Depth)<0) mpz_set_ui(M,Depth);
00377 #endif
00378 mpz_add_ui(M,M,1);
00379
00380 const unsigned int interval = 10000;
00381
00382 #ifdef VERBOSE
00383 MARK;
00384 #endif
00385
00386 for (unsigned int bisher=0; bisher<count; ++bisher)
00387 {
00388 #ifdef VERBOSE
00389 cerr << bisher+1 << "/" << count << ": ";
00390 #endif
00391 do
00392 {
00393
00394
00395 bool sieve[interval] = { false };
00396 for (unsigned int p=3; p<1000; p+=2) if (numtheory::is_prime(p))
00397 {
00398 unsigned int i = 0;
00399 unsigned int r = mpz_fdiv_ui(M,p);
00400 while (r) { r=(r+Depth)%p; ++i; }
00401 while (i<interval) { sieve[i]=true; i+=p; }
00402 }
00403 sieve[interval-1]=false;
00404 unsigned int i=0;
00405 while(i<interval)
00406 {
00407 while(sieve[i]) ++i;
00408 mpz_set_ui(x,Depth); mpz_mul_ui(x,x,i); mpz_add(x,x,M);
00409 #ifdef VERBOSE
00410 cerr << i << " ";
00411 #endif
00412 if (mpz_probab_prime_p(x,10)) break;
00413 ++i;
00414 }
00415 mpz_set_ui(x,Depth); mpz_mul_ui(x,x,i); mpz_add(M,M,x);
00416 #ifdef VERBOSE
00417 cerr << " # +" << i << endl;
00418 #endif
00419 } while (mpz_probab_prime_p(M,10)==0);
00420 mpz_set(primes[bisher],M); mpz_add_ui(M,M,Depth);
00421 }
00422
00423 mpz_clear(M); mpz_clear(x);
00424 }
00425
00426
00427 void CDFT::calc_field_and_roots_and_inverse()
00428 {
00429 mpz_mul(M,N,N); mpz_mul_ui(M,M,max_size);
00430 mpz_mul_ui(M,M,4);
00431
00432 TPolynom MyField = NULL;
00433 get_valid_primes_for(MyField,1,M,max_size);
00434 mpz_set(M,MyField[0]);
00435 mpz_clear(MyField[0]); delete [] MyField;
00436 calc_roots_and_inverse();
00437 }
00438
00439
00440 void CDFT_chinrem::delete_nodes(tnode &node)
00441 {
00442 if (node.left) delete_nodes(*node.left);
00443 if (node.right) delete_nodes(*node.right);
00444 mpz_clear(node.inv_first_M_mod_second_M); mpz_clear(node.M);
00445 delete node.right; delete node.left;
00446 }
00447
00448 void CDFT_chinrem::create_nodes(tnode &node, int &count, int depth)
00449 {
00450 if ( 1<<depth >= anz_dft )
00451 {
00452
00453 node.first_dft=dft[count++];
00454 #if defined(VERBOSE_INFO)
00455 cout << "created dft " << count << ": " << node.first_dft->M << endl;
00456 #endif
00457 node.second_dft=dft[count++];
00458 #if defined(VERBOSE_INFO)
00459 cout << "created dft " << count << ": " << node.second_dft->M << endl;
00460 #endif
00461 node.left=node.right=NULL;
00462 mpz_init(node.M); mpz_mul(node.M,node.first_dft->M,node.second_dft->M);
00463 mpz_init(node.inv_first_M_mod_second_M);
00464 if (!mpz_invert(node.inv_first_M_mod_second_M,node.first_dft->M,node.second_dft->M))
00465 {
00466 MARK; cerr << "BUG!! Inverse MUST exist!" << endl;
00467 exit(1);
00468 }
00469 }
00470 else
00471 {
00472 node.first_dft=node.second_dft=NULL;
00473 node.left = new tnode;
00474 create_nodes(*node.left,count,depth+1);
00475 node.right = new tnode;
00476 create_nodes(*node.right,count,depth+1);
00477 mpz_init(node.M); mpz_mul(node.M,node.left->M,node.right->M);
00478 mpz_init(node.inv_first_M_mod_second_M);
00479 if (!mpz_invert(node.inv_first_M_mod_second_M,node.left->M,node.right->M))
00480 {
00481 MARK; cerr << "BUG!! Inverse MUST exist!" << endl;
00482 exit(1);
00483 }
00484 }
00485 #if defined(VERBOSE_INFO)
00486 cout << "node value: " << node.M << endl;
00487 #endif
00488 }
00489
00490 void CDFT_chinrem::calc_field_and_roots_and_inverse()
00491 {
00492 typedef CDFT_base* Pdft;
00493 mpz_t M;
00494 mpz_init(M);
00495 mpz_mul(M,N,N); mpz_mul_ui(M,M,max_size);
00496 mpz_sqrt(M,M);
00497 anz_dft=2;
00498 while (mpz_sizeinbase(M,10)>125)
00499 {
00500
00501
00502 mpz_sqrt(M,M); anz_dft*=2;
00503 }
00504 Ms = NULL; CDFT_base::get_valid_primes_for(Ms,anz_dft,M,max_size);
00505 dft = new Pdft[anz_dft];
00506 for (int i=0; i<anz_dft; ++i) dft[i] = new CDFT_base(max_size,Ms[i]);
00507 #if defined(VERBOSE_INFO)
00508 cout << "N = " << N << endl;
00509 cout << anz_dft << " nodes have been prepared for chinese remaindering." << endl;
00510 #endif
00511 int count=0; create_nodes(root_node,count);
00512 mpz_clear(M);
00513 }
00514
00515
00516 void CDFT_base::calc_roots_and_inverse()
00517 {
00518 mpz_t x,e;
00519 mpz_init(x); mpz_init(e);
00520
00521 if (!mpz_probab_prime_p(M,10))
00522 {
00523 MARK;
00524 cerr << "invalid M for dft!" << endl;
00525 exit(1);
00526 }
00527
00528 mpz_sub_ui(e,M,1);
00529 if (mpz_div_ui(e,e,max_size)!=0)
00530 {
00531 MARK;
00532 cerr << "invalid M for dft!" << endl;
00533 exit(1);
00534 }
00535
00536 unsigned int r=911;
00537 try_r:
00538 mpz_set_ui(x,r); mpz_powm(w[1],x,e,M); mpz_powm_ui(x,w[1],max_size/2,M);
00539 mpz_add_ui(x,x,1); mpz_mod(x,x,M);
00540
00541
00542 if (mpz_cmp_ui(x,0)!=0)
00543 {
00544 r+=2; if (r<2000) goto try_r;
00545 cerr << "unable to find valid roots..." << endl;
00546 exit(1);
00547 }
00548
00549
00550 mpz_set_ui(w[0],1);
00551 for (unsigned int i=2; i<max_size; ++i)
00552 {
00553 mpz_mul(x,w[i-1],w[1]); mpz_mod(w[i],x,M);
00554 if (mpz_cmp_ui(w[i],1)==0)
00555 {
00556 MARK;
00557 cerr << "invalid roots..." << endl;
00558 exit(1);
00559 }
00560 }
00561
00562 mpz_clear(x); mpz_clear(e);
00563
00564
00565 mpz_set_ui(inverse[0],1);
00566 mpz_set_ui(h,2);
00567 if (!mpz_invert(h,h,M)) { MARK; cerr << "inverse of 2 does not exist!" << endl; exit(1); }
00568 mpz_mod(h,h,M); mpz_set(inverse[1],h);
00569 for (int i=2; i<32; ++i)
00570 {
00571 mpz_mul(h,inverse[i-1],inverse[1]); mpz_mod(h,h,M);
00572 mpz_set(inverse[i],h);
00573 }
00574 }
00575
00576 void CDFT_base::convolute(const TPolynom p, const unsigned int n)
00577 {
00578
00579 if (n==4)
00580 {
00581 mpz_add(p[0],p[0],p[2]); mpz_mul_2exp(p[2],p[2],1); mpz_sub(p[2],p[0],p[2]);
00582 mpz_add(p[1],p[1],p[3]); mpz_add(p[0],p[0],p[1]); mpz_mul_2exp(p[3],p[3],1); mpz_sub(p[3],p[1],p[3]);
00583 mpz_mul(h,p[3],w[max_size>>2]); mpz_mod(h,h,M); mpz_sub(p[3],p[2],h); mpz_add(p[2],p[2],h);
00584 mpz_mul_2exp(p[1],p[1],1); mpz_sub(p[1],p[0],p[1]);
00585 mpz_swap(p[1],p[2]);
00586 }
00587 else
00588 {
00589 const unsigned int nh = n>>1;
00590
00591 #if 1
00592 {
00593 mpz_t* const temp = new mpz_t[nh];
00594 for (unsigned int i=0, j=0; i<nh; ++i)
00595 {
00596 mpz_swap(p[i],p[j++]);
00597 mpz_swap(temp[i],p[j++]);
00598 }
00599 for (unsigned int i=0; i<nh; ++i) mpz_swap(p[i+nh],temp[i]);
00600 delete [] temp;
00601 }
00602 #else
00603
00604 {
00605 mpz_t* const temp = new mpz_t[n];
00606 memcpy(temp,p,n*sizeof(mpz_t));
00607 for (unsigned int i=0, j=0; i<nh; ++i)
00608 {
00609 memcpy(&p[i],&temp[j++],sizeof(mpz_t));
00610 memcpy(&p[i+nh],&temp[j++],sizeof(mpz_t));
00611 }
00612 delete [] temp;
00613 }
00614 #endif
00615
00616 convolute(p,nh);
00617 convolute(&p[nh],nh);
00618
00619 const unsigned int dj = max_size/n;
00620 for (unsigned int i=0,j=0; i<nh; ++i,j+=dj)
00621 {
00622 mpz_mul(h,p[i+nh],w[j]); mpz_mod(h,h,M);
00623 mpz_sub(p[i+nh],p[i],h);
00624 mpz_add(p[i],p[i],h);
00625 }
00626 }
00627 }
00628
00629
00630 int CDFT_base::dftmul(const TPolynom R, const int kR,
00631 const TconstPolynom P1, const int k1,
00632 const TconstPolynom P2, const int k2)
00633 {
00634 const unsigned int estimated_memusage_in_bits = mpz_sizeinbase(M,2)+32;
00635 const int result_size = k1+k2-1;
00636 #if defined(VERBOSE)
00637 cout << "dftmul: " << k1 << ", " << k2 << ", " << result_size << endl;
00638 #endif
00639 size = use_size(result_size);
00640
00641
00642 if (result_size>size)
00643 {
00644 MARK; cerr << "(result_size>size)" << endl;
00645 exit(1);
00646 }
00647 if (kR<result_size)
00648 {
00649 MARK; cerr << "destination polynomial is too small!" << endl;
00650 exit(1);
00651 }
00652
00653
00654
00655 const TPolynom p = (kR>=size) ? R : new mpz_t[size];
00656
00657 if (p!=R) for (int i=0; i<size; ++i) mpz_init2(p[i],estimated_memusage_in_bits);
00658 else for (int i=k1; i<size; ++i) mpz_set_ui(p[i],0);
00659
00660 for (int i=0; i<k1; ++i) mpz_mod(p[i],P1[i],M);
00661 convolute(p,size);
00662
00663 const TPolynom q = (P1==P2 && k1==k2) ? p : new mpz_t[size];
00664 if (p!=q)
00665 {
00666 for (int i=0; i<size; ++i) mpz_init2(q[i],estimated_memusage_in_bits);
00667
00668
00669 for (int i=0; i<k2; ++i) mpz_mod(q[i],P2[i],M);
00670 convolute(q,size);
00671 }
00672
00673
00674 for (int i=0; i<size; ++i)
00675 {
00676 mpz_mul(h,p[i],q[i]);
00677 mpz_mod(p[i],h,M);
00678 }
00679
00680
00681 if (q!=p)
00682 {
00683
00684 for (int i=0; i<size; ++i) mpz_clear(q[i]);
00685 delete [] q;
00686 }
00687
00688 convolute(p,size);
00689 for (int i=1; i<size/2; ++i) mpz_swap(p[i],p[size-i]);
00690
00691 int inv_index=0;
00692 for (int i=1; i<size; i<<=1) ++inv_index;
00693
00694 for (int i=0; i<result_size; ++i)
00695 {
00696 mpz_mul(h,p[i],invpow2(inv_index));
00697 mpz_mod(p[i],h,M);
00698 }
00699
00700 if (p!=R)
00701 {
00702
00703 for (int i=result_size-1; i>=0; --i) mpz_set(R[i],p[i]);
00704 if (size-result_size>10) cout << size-result_size << " computations saved..." << endl;
00705
00706 for (int i=0; i<size; ++i) mpz_clear(p[i]);
00707 delete [] p;
00708 }
00709 else
00710 for (int i=result_size; i<kR; ++i) mpz_set_ui(R[i],0);
00711 return result_size;
00712 }
00713
00714
00715 int CDFT::internal_mul(const TPolynom R, const int kR,
00716 const TconstPolynom P1, const int k1,
00717 const TconstPolynom P2, const int k2,
00718 const bool reduce_result_modN)
00719 {
00720 const size_t ld_N = mpz_sizeinbase(N,2);
00721 const unsigned int estimated_memusage_in_bits = mpz_sizeinbase(M,2)*2+5;
00722 const int result_size = k1+k2-1;
00723 #if defined(VERBOSE_INFO)
00724 cout << "_mul (dft): ";
00725 if (P1==P2) cout << "(SQUARE) ";
00726 cout << k1 << ", " << k2 << ", " << result_size << endl;
00727 #endif
00728 size = use_size(result_size);
00729
00730
00731 if (result_size>size)
00732 {
00733 MARK; cerr << "(result_size>size)" << endl;
00734 exit(1);
00735 }
00736 if (kR<result_size)
00737 {
00738 MARK; cerr << "destination polynomial is too small!" << endl;
00739 exit(1);
00740 }
00741
00742
00743
00744 const TPolynom p = (kR>=size && mpz_sizeinbase(R[0],2)>=estimated_memusage_in_bits) ? R : new mpz_t[size];
00745
00746
00747
00748
00749
00750 if (p!=R) for (int i=0; i<size; ++i) mpz_init2(p[i],estimated_memusage_in_bits);
00751 for (int i=0; i<k1; ++i) mpz_set(p[i],P1[i]);
00752
00753
00754
00755
00756
00757 #if 0
00758 for (int i=0; i<k1; ++i) mpz_mod(p[i],p[i],N);
00759 #else
00760 {
00761 int j=0;
00762 for (int i=0; i<k1; ++i)
00763 {
00764 if ( mpz_sgn(p[i])<0 || mpz_sizeinbase(p[i],2)>ld_N )
00765 {
00766 ++j; mpz_mod(p[i],p[i],N);
00767 }
00768 }
00769 #if defined(VERBOSE)
00770 if (j) cout << "P1: " << j << " out of " << k1 << " coefficients corrected." << endl;
00771 #endif
00772 }
00773 #endif
00774
00775 convolute(p,size);
00776
00777 const TPolynom q = (P1==P2 && k1==k2) ? p : new mpz_t[size];
00778 if (p!=q)
00779 {
00780
00781
00782
00783
00784 for (int i=0; i<size; ++i) mpz_init2(q[i],estimated_memusage_in_bits);
00785 for (int i=0; i<k2; ++i) mpz_set(q[i],P2[i]);
00786
00787
00788 #if 0
00789 for (int i=0; i<k2; ++i) mpz_mod(q[i],q[i],N);
00790 #else
00791 {
00792 int j=0;
00793 for (int i=0; i<k2; ++i)
00794 {
00795 if (mpz_sgn(q[i])<0 || mpz_sizeinbase(q[i],2)>ld_N )
00796 {
00797 ++j; mpz_mod(q[i],q[i],N);
00798 }
00799 }
00800 #if defined(VERBOSE)
00801 if (j) cout << "P2: " << j << " out of " << k2 << " coefficients corrected." << endl;
00802 #endif
00803 }
00804 #endif
00805
00806 convolute(q,size);
00807 }
00808
00809
00810 for (int i=0; i<size; ++i)
00811 {
00812 mpz_mul(p[i],p[i],q[i]);
00813 mpz_mod(p[i],p[i],M);
00814 }
00815
00816
00817 if (q!=p)
00818 {
00819
00820 for (int i=0; i<size; ++i) mpz_clear(q[i]);
00821 delete [] q;
00822 }
00823
00824 convolute(p,size);
00825 for (int i=1; i<size/2; ++i) mpz_swap(p[i],p[size-i]);
00826
00827 int inv_index=0;
00828 for (int i=1; i<size; i<<=1) ++inv_index;
00829
00830 for (int i=0; i<result_size; ++i)
00831 {
00832 mpz_mul(p[i],p[i],invpow2(inv_index));
00833 mpz_mod(p[i],p[i],M);
00834 if (reduce_result_modN) mpz_mod(p[i],p[i],N);
00835 }
00836
00837 if (p!=R)
00838 {
00839
00840 for (int i=result_size-1; i>=0; --i) mpz_set(R[i],p[i]);
00841 if (size-result_size>10) cout << size-result_size << " computations saved..." << endl;
00842 #ifdef DEBUG
00843
00844 for (int i=result_size; i<size; ++i)
00845 {
00846 mpz_mul(p[i],p[i],invpow2(inv_index));
00847 mpz_mod(p[i],p[i],M);
00848 if (mpz_cmp_ui(p[i],0)!=0)
00849 {
00850 MARK;
00851 cerr << "These values should be ZERO!" << endl;
00852 }
00853 }
00854 #endif
00855
00856 for (int i=0; i<size; ++i) mpz_clear(p[i]);
00857 delete [] p;
00858 }
00859
00860 return result_size;
00861 }
00862
00863
00864
00865 void CDFT_chinrem::recurse_dftmul(tnode &node,
00866 const TPolynom R, const int kR,
00867 const TconstPolynom P1, const int k1,
00868 const TconstPolynom P2, const int k2)
00869 {
00870 if (node.left || node.right)
00871 {
00872
00873
00874 const TPolynom p1 = new mpz_t[k1];
00875 for (int i=0; i<k1; ++i) { mpz_init(p1[i]); mpz_mod(p1[i],P1[i],node.M); }
00876 const TPolynom p2 = (P1==P2) ? p1 : new mpz_t[k2];
00877 if (p1!=p2)
00878 for (int i=0; i<k2; ++i) { mpz_init(p2[i]); mpz_mod(p2[i],P2[i],node.M); }
00879
00880 recurse_dftmul(*node.left,R,kR,p1,k1,p2,k2);
00881 TTempPolynom R2(kR);
00882
00883 recurse_dftmul(*node.right,R2,kR,p1,k1,p2,k2);
00884
00885 if (p1!=p2)
00886 {
00887 for (int i=0; i<k2; ++i) mpz_clear(p2[i]);
00888 delete [] p2;
00889 }
00890 for (int i=0; i<k1; ++i) mpz_clear(p1[i]);
00891 delete [] p1;
00892
00893
00894
00895 mpz_t h; mpz_init(h);
00896 for (int i=0; i<k1+k2-1; ++i)
00897 {
00898 mpz_sub(h,R2[i],R[i]);
00899 mpz_mul(h,h,node.inv_first_M_mod_second_M);
00900 mpz_mod(h,h,node.right->M); mpz_addmul(R[i],h,node.left->M);
00901
00902 }
00903 mpz_clear(h);
00904 }
00905 else
00906 {
00907
00908
00909
00910 const TPolynom p1 = new mpz_t[k1];
00911 for (int i=0; i<k1; ++i) { mpz_init(p1[i]); mpz_mod(p1[i],P1[i],node.M); }
00912 const TPolynom p2 = (P1==P2) ? p1 : new mpz_t[k2];
00913 if (p1!=p2)
00914 for (int i=0; i<k2; ++i) { mpz_init(p2[i]); mpz_mod(p2[i],P2[i],node.M); }
00915
00916 node.first_dft->dftmul(R,kR,p1,k1,p2,k2);
00917 TTempPolynom R2(kR);
00918 node.second_dft->dftmul(R2,kR,p1,k1,p2,k2);
00919
00920 if (p1!=p2)
00921 {
00922 for (int i=0; i<k2; ++i) mpz_clear(p2[i]);
00923 delete [] p2;
00924 }
00925 for (int i=0; i<k1; ++i) mpz_clear(p1[i]);
00926 delete [] p1;
00927
00928
00929 mpz_t h; mpz_init(h);
00930 for (int i=0; i<k1+k2-1; ++i)
00931 {
00932 mpz_sub(h,R2[i],R[i]);
00933 mpz_mul(h,h,node.inv_first_M_mod_second_M);
00934 mpz_mod(h,h,node.second_dft->M); mpz_addmul(R[i],h,node.first_dft->M);
00935
00936 }
00937 mpz_clear(h);
00938 }
00939 }
00940
00941
00942 int CDFT_chinrem::internal_mul(const TPolynom R, const int kR,
00943 const TconstPolynom P1, const int k1,
00944 const TconstPolynom P2, const int k2,
00945 const bool reduce_result_modN)
00946 {
00947 const int result_size = k1+k2-1;
00948 #if defined(VERBOSE_INFO)
00949 cout << "_mul (dft,chinrem): ";
00950 if (P1==P2) cout << "(SQUARE) ";
00951 cout << k1 << ", " << k2 << ", " << result_size << endl;
00952 #endif
00953
00954 if (kR<result_size)
00955 {
00956 MARK; cerr << "destination polynomial is too small!" << endl;
00957 exit(1);
00958 }
00959
00960
00961
00962 const TPolynom p1 = new mpz_t[k1];
00963
00964
00965
00966
00967
00968 for (int i=0; i<k1; ++i) mpz_init(p1[i]);
00969 for (int i=0; i<k1; ++i) mpz_mod(p1[i],P1[i],N);
00970
00971 const TPolynom p2 = (P1==P2 && k1==k2) ? p1 : new mpz_t[k2];
00972 if (p1!=p2)
00973 {
00974
00975
00976
00977
00978 for (int i=0; i<k2; ++i) mpz_init(p2[i]);
00979 for (int i=0; i<k2; ++i) mpz_mod(p2[i],P2[i],N);
00980 }
00981
00982 const int size = use_size(result_size);
00983 if (kR>=size)
00984 {
00985 recurse_dftmul(root_node,R,kR,p1,k1,p2,k2);
00986 if (reduce_result_modN)
00987 for (int i=0; i<result_size; ++i) mpz_mod(R[i],R[i],N);
00988 }
00989 else
00990 {
00991 TTempPolynom myR(size);
00992 recurse_dftmul(root_node,myR,size,p1,k1,p2,k2);
00993 if (reduce_result_modN)
00994 for (int i=0; i<result_size; ++i) mpz_mod(R[i],myR[i],N);
00995 else
00996 for (int i=0; i<result_size; ++i) mpz_set(R[i],myR[i]);
00997 }
00998
00999 if (p1!=p2)
01000 {
01001 for (int i=0; i<k2; ++i) mpz_clear(p2[i]);
01002 delete [] p2;
01003 }
01004 for (int i=0; i<k1; ++i) mpz_clear(p1[i]);
01005 delete [] p1;
01006
01007
01008 return result_size;
01009 }
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020 typedef CDFT_chinrem TDFT;
01021 typedef TDFT* PDFT;
01022
01023
01024 #if 1
01025 inline bool dft_mul_is_recommended(const int k1, const int k2)
01026 {
01027
01028 if (k1<14000 || k2<14000) return false;
01029 if (k1<=16384) return true;
01030 if (k1>25000 && k2>25000) return true;
01031 return false;
01032 }
01033
01034 inline bool dft_square_is_recommended(const int k)
01035 {
01036
01037 return k>=8192;
01038 }
01039
01040 #else
01041
01042 inline bool dft_mul_is_recommended(const int k1, const int k2)
01043 {
01044 return k1+k2>=8;
01045
01046 }
01047
01048 inline bool dft_square_is_recommended(const int k)
01049 {
01050 return k>=4;
01051
01052 }
01053
01054 #endif
01055
01056
01057
01058 const PDFT get_dft(const unsigned int n, const mpz_t m)
01059 {
01060 static PDFT pdft = NULL;
01061 if (!pdft)
01062 {
01063 if (n<=0) return NULL;
01064 pdft = new TDFT(n>32768 ? n : 32768,m);
01065 }
01066
01067 if ( n > pdft->max_size
01068 ||
01069 mpz_cmp(pdft->get_N(),m)!=0
01070 ||
01071 n==0
01072 )
01073 {
01074 delete pdft; pdft=NULL;
01075 if (n>0)
01076 {
01077 cout << "renewing dft-object..." << endl;
01078 pdft = new TDFT(n,m);
01079 }
01080 else
01081 {
01082
01083 cout << "dft-object is released..." << endl;
01084 }
01085 return pdft;
01086 }
01087 return pdft;
01088 }
01089
01090 void clear_dft_tempmemory()
01091 {
01092 mpz_t x;
01093 mpz_init(x);
01094 get_dft(0,x);
01095 mpz_clear(x);
01096 }
01097
01098 }
01099