00001 #ifndef MYBITSTRING_HEADER_
00002 #define MYBITSTRING_HEADER_
00003
00010 #include <new>
00011 using std::nothrow;
00012
00013 #include "utils.H"
00014
00015 class myBitString
00016 {
00017 protected:
00018 int size;
00019 unsigned int *data;
00020 inline void resize(int newsize)
00021 {
00022 if (newsize < size) return;
00023
00024 unsigned int *olddata = data;
00025 data = new(nothrow) unsigned int[newsize];
00026 if (!data)
00027 {
00028 cerr << "myBitString.resize(" << newsize << ") out of memory" << endl;
00029 exit(1);
00030 }
00031 for (int i=0; i<size; i++) data[i] = olddata[i];
00032 for (int i=size; i< newsize; i++) data[i]=0;
00033 size=newsize;
00034 if (olddata) delete [] olddata;
00035 }
00036 public:
00037 inline void optisize(void)
00038 {
00039 int i=size-1;
00040 while (i>=0 && data[i]==0) i--;
00041 if (i==size-1) return;
00042
00043 if (size<128)
00044 {
00045
00046
00047 #ifdef VERBOSE
00048 cout << "MyBitString::Optisize, no reallocate " << size << " to " << i+1 << endl;
00049 #endif
00050 size=i+1; return;
00051 }
00052
00053 #ifdef VERBOSE
00054 cout << "MyBitString::Optisize, reallocate" << size << " to " << i+1 << endl;
00055 #endif
00056 if (i<0) { delete [] data; data=NULL; size=0; return; }
00057
00058 size=i+1;
00059 unsigned int *olddata = data;
00060 data = new(nothrow) unsigned int[size];
00061 if (!data)
00062 {
00063 cerr << "myBitString.optisize(" << size << ") out of memory" << endl;
00064 exit(1);
00065 }
00066 for (; i>=0; --i) data[i] = olddata[i];
00067 if (olddata) delete [] olddata;
00068 }
00069
00070 inline myBitString(void) : size(0), data(NULL) { }
00071 inline ~myBitString(void)
00072 {
00073 if (data) delete [] data;
00074 data=NULL;
00075 size=0;
00076 }
00077 inline myBitString(const myBitString &rhs) : size(rhs.size), data(NULL)
00078 {
00079 data = new(nothrow) unsigned int[size];
00080 if (!data)
00081 {
00082 cerr << "myBitString.operator=(" << size << ") out of memory" << endl;
00083 exit(1);
00084 }
00085 for (int i=size-1; i>=0; --i) data[i] = rhs.data[i];
00086 }
00087 inline myBitString& operator= (const myBitString &rhs)
00088 {
00089 if (data) delete [] data;
00090 size=rhs.size;
00091 data = new(nothrow) unsigned int[size];
00092 if (!data)
00093 {
00094 cerr << "myBitString.operator=(" << size << ") out of memory" << endl;
00095 exit(1);
00096 }
00097 for (int i=size-1; i>=0; --i) data[i] = rhs.data[i];
00098 return *this;
00099 }
00100 inline int count(const bool b = true) const
00101 {
00102 int z=0;
00103 if (!b) return -1;
00104 for (int i=0; i<size; i++)
00105 {
00106 register unsigned int h = data[i];
00107 while (h)
00108 {
00109 if (h&1) z++;
00110 h>>=1;
00111 }
00112 }
00113 return z;
00114 }
00115 inline int first(const bool b = true) const
00116 {
00117 if (b)
00118 {
00119 for (int i=0; i<size; i++)
00120 {
00121 register unsigned int h = data[i];
00122 if (h)
00123 {
00124 int j;
00125 asm("bsfl %0,%1" : "+g" (h), "=c" (j) );
00126 return 32*i+j;
00127 }
00128 }
00129
00130 return -1;
00131 }
00132 else
00133 {
00134 cerr << "myBitString.first(0) not implemented" << endl;
00135 exit(1);
00136 }
00137 }
00138 inline int next(int pos, const bool b = true) const
00139 {
00140 int i,j;
00141 if (pos<0) return first(b);
00142 pos++;
00143 if (b)
00144 {
00145 i=pos>>5;
00146 j=pos&31;
00147 if (i>=size) return -1;
00148 register unsigned int h = data[i];
00149 h>>=j;
00150 if (h)
00151 {
00152 int j;
00153 asm("bsfl %0,%1" : "+g" (h), "=c" (j) );
00154 return pos+j;
00155 }
00156 else
00157 {
00158 j = 0;
00159 for (i++;i<size; i++)
00160 {
00161 h = data[i];
00162 if (h)
00163 {
00164 int j;
00165 asm("bsfl %0,%1" : "+g" (h), "=c" (j) );
00166 return 32*i+j;
00167 }
00168 }
00169 }
00170 return -1;
00171 }
00172 else
00173 {
00174 cerr << "myBitString.next(0) not implemented" << endl;
00175 exit(1);
00176 }
00177 }
00178 inline int last(const bool b = true) const
00179 {
00180 if (b)
00181 {
00182 for (register int i=size-1; i>=0; i--)
00183 {
00184
00185 register unsigned int h = data[i];
00186 if (h)
00187 {
00188 int j;
00189 asm("bsrl %0,%1" : "+g" (h), "=c" (j) );
00190 return 32*i+j;
00191 }
00192 }
00193
00194 return -1;
00195 }
00196 else
00197 {
00198 cerr << "myBitString.last(0) not implemented" << endl;
00199 exit(1);
00200 }
00201 }
00202 inline int prev(int pos, const bool b = true) const
00203 {
00204 if (pos>32*size) return last(b);
00205 if (pos<=0) return -1;
00206 pos--;
00207 if (b)
00208 {
00209 int i=pos>>5;
00210 int j=pos&31;
00211 register unsigned int h = data[i];
00212 h<<=31-j;
00213 if (h)
00214 {
00215 int j;
00216 asm("bsrl %0,%1" : "+g" (h), "=c" (j) );
00217 return pos-(31-j);
00218 }
00219 else
00220 {
00221 for (i--;i>=0; i--)
00222 {
00223 h = data[i];
00224 if (h)
00225 {
00226 int j;
00227 asm("bsrl %0,%1" : "+g" (h), "=c" (j) );
00228 return 32*i+j;
00229 }
00230 }
00231 }
00232 return -1;
00233 }
00234 else
00235 {
00236 cerr << "myBitString.prev(0) not implemented" << endl;
00237 exit(1);
00238 }
00239 }
00240 inline void invert(const int pos)
00241 {
00242
00243 int i=pos>>5;
00244 if (i>=size) resize(i+1);
00245 asm volatile ("btcl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00246
00247 }
00248 inline bool test_and_invert(const int pos)
00249 {
00250
00251 int i=pos>>5;
00252 if (i>=size) resize(i+1);
00253 bool ret;
00254 asm ("btcl %[pos],(%[data])\n\tsetc %[flag]" : [flag] "=g" (ret) : [data] "q" (data), [pos] "r" (pos): "cc");
00255
00256 return ret;
00257 }
00258 inline bool test(const int pos) const
00259 {
00260 if ((pos>>5)>=size) return false;
00261 bool r;
00262 asm ("btl %2,(%1)\n\tsetc %0" : "=r" (r) : "q" (data), "r" (pos) : "cc");
00263 return r;
00264 }
00265 inline void set(const int pos, const bool b)
00266 {
00267
00268 register int i=pos>>5;
00269 if (i>=size)
00270 {
00271 if (b)
00272 resize(i+1);
00273 else
00274 return;
00275 }
00276 if (b)
00277 asm volatile ("btsl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00278 else
00279 asm volatile ("btrl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00280
00281 }
00282 inline void set(const int pos)
00283 {
00284 register int i=pos>>5;
00285 if (i>=size) resize(i+1);
00286 asm ("btsl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00287
00288 }
00289 inline void _xor(const myBitString &s)
00290 {
00291 int i=s.size-1;
00292 while (i>=0 && s.data[i]==0) i--;
00293 if (i>=size)
00294 {
00295 cerr << "myBitString.xor calls resize" << endl;
00296 resize(i+1);
00297 }
00298 for (; i>=0; --i)
00299 data[i] ^= s.data[i];
00300 }
00301 inline const myBitString& operator ^= (const myBitString &s)
00302 {
00303 _xor(s); return *this;
00304 }
00305
00306 inline void _and(const myBitString &s1, const myBitString &s2)
00307
00308 {
00309 int i = MIN(s1.size,s2.size)-1;
00310 if (i>=size)
00311 {
00312
00313
00314
00315
00316 if (data) delete [] data;
00317 size=i+1;
00318 data = new unsigned int[size];
00319
00320 }
00321 else
00322 {
00323 register int j=size-1;
00324 while (j>i) data[j--]=0;
00325 }
00326 for (; i>=0; --i)
00327 data[i] = s1.data[i] & s2.data[i];
00328 }
00329
00330 template<typename T> void test_and_add_carry(const myBitString &s2, T &CarryVec) const
00331
00332
00333
00334
00335
00336 {
00337 const int s = MIN(size,s2.size);
00338 const int Bits = 8*sizeof(unsigned int);
00339 for (int i=0; i<s; ++i)
00340 {
00341 register unsigned int h = data[i] & s2.data[i];
00342 register int j=0;
00343 while (h)
00344 {
00345 if (h&0xff)
00346 {
00347 CarryVec[Bits*i+j ]+=h&1;
00348 CarryVec[Bits*i+j+1]+=(h>>1)&1;
00349 CarryVec[Bits*i+j+2]+=(h>>2)&1;
00350 CarryVec[Bits*i+j+3]+=(h>>3)&1;
00351 CarryVec[Bits*i+j+4]+=(h>>4)&1;
00352 CarryVec[Bits*i+j+5]+=(h>>5)&1;
00353 CarryVec[Bits*i+j+6]+=(h>>6)&1;
00354 CarryVec[Bits*i+j+7]+=(h>>7)&1;
00355 }
00356 h>>=8;
00357 j+=8;
00358 }
00359 }
00360 }
00361
00362 #if 1 && defined(ASM_SSE2)
00363 #if defined(VERBOSE)
00364 #warning "support for test_and_add_carry(...), (uint16, using SSE2)"
00365 #endif
00366 void test_and_add_carry(const myBitString &s2, unsigned short int CarryVec[]) const
00367
00368
00369
00370 {
00371
00372
00373 static const unsigned short int PackedMultipl[8] __attribute__ ((aligned (16))) = { 128,64,32,16,8,4,2,1 };
00374
00375
00376 #ifdef DEBUG
00377 if (reinterpret_cast<unsigned long int>(&PackedMultipl[0]) & 0x0fUL)
00378 {
00379 cerr << "Address of PackedMultipl is " << &PackedMultipl[0]
00380 << " which is not 16 byte aligned as requested!" << endl;
00381 }
00382 #endif
00383
00384 int s = MIN(size,s2.size)-1;
00385
00386
00387
00388
00389
00390
00391
00392 asm volatile ( \
00393 "test %[i],%[i] \n\t" \
00394 "js 9f \n\t" \
00395 "movl $0x00010001,%%eax \n\t" \
00396 "movl %[i],%%edx \n\t" \
00397 "movd %%eax,%%xmm7 \n\t" \
00398 "movdqu %[PM],%%xmm6 # movdqa would be better, but cygwin alignment seems to be broken \n\t" \
00399 "movl (%[data1],%[i],4),%%eax \n\t" \
00400 "pshufd $0x00,%%xmm7,%%xmm7 \n\t" \
00401 "shll $6,%%edx \n\t" \
00402 "andl (%[data2],%[i],4),%%eax \n\t" \
00403 "addl %[V],%%edx \n\t" \
00404 "movd %%eax,%%xmm4 \n\t" \
00405 "mov %[i],%%eax \n\t" \
00406 "1: \n\t" \
00407 "dec %%eax \n\t" \
00408 "cmovs %[i],%%eax \n\t" \
00409 "prefetchnta -72(%[data2],%[i],4) \n\t" \
00410 "pshuflw $0x00,%%xmm4,%%xmm1 \n\t" \
00411 "pshuflw $0x55,%%xmm4,%%xmm3 \n\t" \
00412 "movd (%[data1],%%eax,4),%%xmm5 \n\t" \
00413 "pshufd $0x00,%%xmm1,%%xmm1 \n\t" \
00414 "pshufd $0x00,%%xmm3,%%xmm2 \n\t" \
00415 "pmullw %%xmm6,%%xmm1 \n\t" \
00416 "movd (%[data2],%%eax,4),%%xmm4 \n\t" \
00417 "movdqa %%xmm1,%%xmm0 \n\t" \
00418 "psrlw $15,%%xmm1 \n\t" \
00419 "pmullw %%xmm6,%%xmm2 \n\t" \
00420 "psrlw $7,%%xmm0 \n\t" \
00421 "movdqa %%xmm2,%%xmm3 \n\t" \
00422 "psrlw $7,%%xmm2 \n\t" \
00423 "pand %%xmm7,%%xmm0 \n\t" \
00424 "psrlw $15,%%xmm3 \n\t" \
00425 "pand %%xmm7,%%xmm2 \n\t" \
00426 "paddw (%%edx),%%xmm0 \n\t" \
00427 "paddw 16(%%edx),%%xmm1 \n\t" \
00428 "pand %%xmm5,%%xmm4 \n\t" \
00429 "paddw 32(%%edx),%%xmm2 \n\t" \
00430 "paddw 48(%%edx),%%xmm3 \n\t" \
00431 "movdqa %%xmm0,(%%edx) \n\t" \
00432 "movdqa %%xmm1,16(%%edx) \n\t" \
00433 "movdqa %%xmm2,32(%%edx) \n\t" \
00434 "movdqa %%xmm3,48(%%edx) \n\t" \
00435 "3: subl $64,%%edx \n\t" \
00436 "decl %[i] \n\t" \
00437 "jns 1b \n\t" \
00438 "9: " \
00439 : [i] "+r" (s)
00440 : [PM] "m" (PackedMultipl[0]), [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00441 : "memory", "cc", "eax", "edx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
00442 }
00443 #elif defined(ASM_ATHLON) || (defined (ASM_MMX) && defined(ASM_SSE))
00444 #ifdef DEBUG
00445 #warning "support for test_and_add_carry(...), (uint16, using MMX/MMX-extensions)"
00446 #endif
00447 void test_and_add_carry(const myBitString &s2, unsigned short int CarryVec[]) const
00448
00449
00450
00451 {
00452
00453 static const unsigned short int PackedMultipl[4] __attribute__ ((aligned (8))) = { 8,4,2,1 };
00454 int s = MIN(size,s2.size)-1;
00455 asm volatile ( \
00456 "test %[i],%[i] \n\t" \
00457 "js 9f \n\t" \
00458 "movl $0x00010001,%%eax \n\t" \
00459 "movd %%eax,%%mm7 \n\t" \
00460 "movq %[PM],%%mm6 \n\t" \
00461 "punpckldq %%mm7,%%mm7 \n\t" \
00462 "1: movl (%[data1],%[i],4),%%eax \n\t" \
00463 "movl %[i],%%edx \n\t" \
00464 "andl (%[data2],%[i],4),%%eax \n\t" \
00465 "shll $5,%%edx \n\t" \
00466 "prefetchnta -64(%[data2],%[i],4) \n\t" \
00467 "2: \n\t" \
00468 "movd %%eax,%%mm0 \n\t" \
00469 "pshufw $0x00,%%mm0,%%mm0 \n\t" \
00470 "pmullw %%mm6,%%mm0 \n\t" \
00471 "movq %%mm0,%%mm1 \n\t" \
00472 "movq %%mm0,%%mm2 \n\t" \
00473 "movq %%mm0,%%mm3 \n\t" \
00474 "psrlw $3,%%mm0 \n\t" \
00475 "psrlw $7,%%mm1 \n\t" \
00476 "psrlw $11,%%mm2 \n\t" \
00477 "psrlw $15,%%mm3 \n\t" \
00478 "pand %%mm7,%%mm0 \n\t" \
00479 "pand %%mm7,%%mm1 \n\t" \
00480 "pand %%mm7,%%mm2 \n\t" \
00481 "paddw (%[V],%%edx,2),%%mm0 \n\t" \
00482 "paddw 8(%[V],%%edx,2),%%mm1 \n\t" \
00483 "paddw 16(%[V],%%edx,2),%%mm2 \n\t" \
00484 "paddw 24(%[V],%%edx,2),%%mm3 \n\t" \
00485 "movq %%mm0,(%[V],%%edx,2) \n\t" \
00486 "movq %%mm1,8(%[V],%%edx,2) \n\t" \
00487 "movq %%mm2,16(%[V],%%edx,2) \n\t" \
00488 "movq %%mm3,24(%[V],%%edx,2) \n\t" \
00489 "addl $16,%%edx \n\t" \
00490 "shrl $16,%%eax \n\t" \
00491 "jnz 2b \n\t" \
00492 "decl %[i] \n\t" \
00493 "jns 1b \n\t" \
00494 "emms \n\t" \
00495 "9: " \
00496 : [i] "+r" (s)
00497 : [PM] "m" (PackedMultipl[0]), [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00498 : "memory", "cc", "eax", "edx", "mm0", "mm1", "mm2", "mm3", "mm6", "mm7");
00499 }
00500 #else
00501 void test_and_add_carry(const myBitString &s2, unsigned short int CarryVec[]) const
00502
00503
00504
00505 {
00506
00507 int s = MIN(size,s2.size)-1;
00508 asm volatile ( \
00509 "test %[i],%[i] \n\t" \
00510 "js 9f \n\t" \
00511 "1: movl (%[data1],%[i],4),%%eax \n\t" \
00512 "movl %[i],%%edx \n\t" \
00513 "andl (%[data2],%[i],4),%%eax \n\t" \
00514 "shll $5,%%edx \n\t" \
00515 "2: shrl %%eax \n\t" \
00516 "adcw $0,(%[V],%%edx,2) \n\t" \
00517 "shrl %%eax \n\t" \
00518 "adcw $0,2(%[V],%%edx,2) \n\t" \
00519 "shrl %%eax \n\t" \
00520 "adcw $0,4(%[V],%%edx,2) \n\t" \
00521 "shrl %%eax \n\t" \
00522 "adcw $0,6(%[V],%%edx,2) \n\t" \
00523 "shrl %%eax \n\t" \
00524 "adcw $0,8(%[V],%%edx,2) \n\t" \
00525 "shrl %%eax \n\t" \
00526 "adcw $0,10(%[V],%%edx,2) \n\t" \
00527 "shrl %%eax \n\t" \
00528 "adcw $0,12(%[V],%%edx,2) \n\t" \
00529 "shrl %%eax \n\t" \
00530 "adcw $0,14(%[V],%%edx,2) \n\t" \
00531 "addl $8,%%edx \n\t" \
00532 "test %%eax,%%eax \n\t" \
00533 "jnz 2b \n\t" \
00534 "decl %[i] \n\t" \
00535 "jns 1b \n\t" \
00536 "9: " \
00537 : [i] "+r" (s)
00538 : [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00539 : "memory", "cc", "eax", "edx");
00540 }
00541 #endif
00542
00543
00544 #if 1
00545 void test_and_add_carry(const myBitString &s2, unsigned int CarryVec[]) const
00546
00547
00548
00549 {
00550
00551 int s = MIN(size,s2.size)-1;
00552 asm volatile ( \
00553 "test %[i],%[i] \n\t" \
00554 "js 9f \n\t" \
00555 "1: movl (%[data1],%[i],4),%%eax \n\t" \
00556 "movl %[i],%%edx \n\t" \
00557 "andl (%[data2],%[i],4),%%eax \n\t" \
00558 "shll $5,%%edx \n\t" \
00559 "2: btl $0,%%eax \n\t" \
00560 "adcl $0,(%[V],%%edx,4) \n\t" \
00561 "btl $1,%%eax \n\t" \
00562 "adcl $0,4(%[V],%%edx,4) \n\t" \
00563 "btl $2,%%eax \n\t" \
00564 "adcl $0,8(%[V],%%edx,4) \n\t" \
00565 "btl $3,%%eax \n\t" \
00566 "adcl $0,12(%[V],%%edx,4) \n\t" \
00567 "btl $4,%%eax \n\t" \
00568 "adcl $0,16(%[V],%%edx,4) \n\t" \
00569 "btl $5,%%eax \n\t" \
00570 "adcl $0,20(%[V],%%edx,4) \n\t" \
00571 "btl $6,%%eax \n\t" \
00572 "adcl $0,24(%[V],%%edx,4) \n\t" \
00573 "btl $7,%%eax \n\t" \
00574 "adcl $0,28(%[V],%%edx,4) \n\t" \
00575 "addl $8,%%edx \n\t" \
00576 "shrl $8,%%eax \n\t" \
00577 "jnz 2b \n\t" \
00578 "decl %[i] \n\t" \
00579 "jns 1b \n\t" \
00580 "9: " \
00581 : [i] "+r" (s)
00582 : [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00583 : "memory", "cc", "eax", "edx");
00584 }
00585 #endif
00586
00587 };
00588
00589 #endif