Qsieve: invmod.c Source File

00001 
00007 /*--------------------------------------------------------------------
00008 This file is placed in the public domain by its author,
00009 Jason Papadopoulos. You may use it for any purpose, free of charge,
00010 without having to notify anyone. I disclaim any responsibility for any
00011 errors.
00012 
00013 Optionally, please be nice and tell me if you find this source to be
00014 useful. Again optionally, if you add to the functionality present here
00015 please consider making those additions public too, so that others may 
00016 benefit from your work. 
00017                                    --jasonp@boo.net
00018 --------------------------------------------------------------------*/
00019 
00020 
00021 /*-------------------------------------------------------------------------
00022 This file was modified by Thorsten Reinecke to be compatible with Qsieve.
00023 It can be used as replacement for the other invmod functions in modulo.cc.
00024 
00025 For generic support of 23bit invmod, this version is a very fast
00026 alternative. (For Athlon computers, the crafted cmov assembler version of
00027 montgom_invmod is faster than this one.) Many thanks to the author for
00028 providing me this file, so that I was able to do performance tests.
00029 
00030 Except for this file (and except otherwise noted), all the other files in
00031 the Qsieve distribution are still copyrighted under the GPL!
00032                                    thre@thorstenreinecke.de
00033 -------------------------------------------------------------------------*/
00034 
00035 
00036 /*--------------------------------------------------------------------*/
00037 
00038         /* When the factor base is really big, the MPQS
00039            initialization stage takes a big fraction of the
00040            runtime; timings show that ~90% of the time in
00041            the initialization stage is spent computing modular
00042            inverses. To that end, the following implements a
00043            custom modular inverse that avoids integer division.
00044            It's somewhat obscure, but far less obscure than
00045            implementing the self-initializing variant of MPQS.
00046 
00047            This is *much* faster than using mp_expo_1, because
00048            it uses floating point and several tricks for fast
00049            rounding. */
00050 
00051 const double ieee_round = 6755399441055744.0;
00052 const double intel_round = 6755399441055744.0 * 2048.0;
00053 const double round_test = 2.7;
00054 const double round_correct = 3.0;
00055 volatile double round_constant[2];
00056 
00057 
00058 void init_fast_invmod(void) {
00059  
00060         /* Unfortunately, the fast rounding requires figuring
00061            out the size of a floating point register. The choices
00062            are the 53-bit IEEE size and the 64-bit Intel size;
00063            however, the OS may force the Intel size down to the
00064            IEEE size for compatibility. The following performs
00065            a runtime check to see which register size is in use,
00066            and selects the magic rounding constant accordingly. */
00067 
00068         double round_me;
00069  
00070         round_constant[0] = intel_round;
00071         round_constant[1] = intel_round;
00072         round_me = round_test;
00073         round_me += round_constant[0];
00074         round_me -= round_constant[1];
00075         
00076         
00077         printf("initializing jason's fast_invmod\n");
00078         if (round_me != round_correct) {
00079                 round_constant[0] = ieee_round;
00080                 round_constant[1] = ieee_round;
00081                 round_me = round_test;
00082                 round_me += round_constant[0];
00083                 round_me -= round_constant[1];
00084                 if( round_me != round_correct ) {
00085                         printf("error: can't initialize fast rounding\n");
00086                         exit(-1);
00087                 }
00088         }
00089 
00090         /* Check that the compiler did not incorrectly 
00091            optimize our custom modular inverse routine */
00092 
00093         if (fast_invmod(6, 17) != 3) {
00094                 printf("error: modular inverse does not work\n");
00095                 exit(-1);
00096         }
00097 }
00098 
00099 unsigned int fast_invmod(unsigned int a, unsigned int p) {
00100 
00101         /* Compute mp_expo_1(a, p-2, p) using floating point.
00102            'a' and 'p' must be less than ~25 bits in size if
00103            using IEEE fast rounding, but can go up to ~31 bits
00104            if Intel fast rounding is in use. Both of these
00105            are far larger than MPQS will ever need.
00106 
00107            The code avoids an integer remainder by multiplying
00108            by the floating point reciprocal and (fast-)rounding to
00109            the nearest integral floating point value. Within
00110            the while() loop, the accumulated result is always
00111            bounded by +-p, and is normalized afterwards. 
00112            
00113            The exponent is scanned right to left; at each step the
00114            modular squaring and multiply are both performed, even
00115            if the multiply result will be thrown away */
00116            
00117         unsigned int exponent = p - 2;
00118         int result;
00119         double dp = (double)(p);
00120         double dsquare = (double)(a);
00121         double recip = 1.0 / dp;
00122         double round0 = round_constant[0];
00123         double round1 = round_constant[1];
00124         double dresult = 1.0;
00125 
00126         while (exponent) {
00127                 double dsquare2 = dsquare * dsquare;
00128                 double dresult2 = dresult * dsquare;
00129 
00130                 double drsquare = dsquare2 * recip + round0 - round1;
00131                 double drresult = dresult2 * recip + round0 - round1;
00132 
00133                 dsquare2 -= dp * drsquare;
00134                 dresult2 -= dp * drresult;
00135 
00136                 dsquare = dsquare2;
00137                 if (exponent & 1)
00138                         dresult = dresult2;
00139 
00140                 exponent >>= 1;
00141         }
00142 
00143         result = (int)dresult;
00144 
00145         if (result < 0)
00146                 return (unsigned int)(result + p);
00147         else
00148                 return (unsigned int)result;
00149 }