00001 #ifndef MYBITSTRING_HEADER_
00002 #define MYBITSTRING_HEADER_
00003
00010 #include <new>
00011 using std::nothrow;
00012
00013 #include "utils.H"
00014
00015 #if 0
00016
00017 typedef unsigned int BS_unsigned_int;
00018 typedef signed int BS_signed_int;
00019 #define DW_per_BS_unsigned_int 1
00020 #define BS_shifts 5
00021 #define BS_bits 32
00022 #define BS_mask 31
00023 #else
00024
00025 typedef unsigned long int BS_unsigned_int;
00026 typedef signed long int BS_signed_int;
00027 #define DW_per_BS_unsigned_int 2
00028 #define BS_shifts 6
00029 #define BS_bits 64
00030 #define BS_mask 63
00031 #endif
00032
00033 class myBitString
00034 {
00035 protected:
00036 int size;
00037 BS_unsigned_int *data;
00038 inline void resize(int newsize)
00039 {
00040 if (newsize < size) return;
00041
00042 BS_unsigned_int *olddata = data;
00043 data = new(nothrow) BS_unsigned_int[newsize];
00044 if (!data)
00045 {
00046 cerr << "myBitString.resize(" << newsize << ") out of memory" << endl;
00047 exit(1);
00048 }
00049 for (int i=0; i<size; i++) data[i] = olddata[i];
00050 for (int i=size; i< newsize; i++) data[i]=0;
00051 size=newsize;
00052 if (olddata) delete [] olddata;
00053 }
00054 public:
00055 inline void optisize(void)
00056 {
00057 int i=size-1;
00058 while (i>=0 && data[i]==0) i--;
00059 if (i==size-1) return;
00060
00061 if (size<128/DW_per_BS_unsigned_int)
00062 {
00063
00064
00065 #ifdef VERBOSE
00066 cout << "MyBitString::Optisize, no reallocate " << size << " to " << i+1 << endl;
00067 #endif
00068 size=i+1; return;
00069 }
00070
00071 #ifdef VERBOSE
00072 cout << "MyBitString::Optisize, reallocate" << size << " to " << i+1 << endl;
00073 #endif
00074 if (i<0) { delete [] data; data=NULL; size=0; return; }
00075
00076 size=i+1;
00077 BS_unsigned_int *olddata = data;
00078 data = new(nothrow) BS_unsigned_int[size];
00079 if (!data)
00080 {
00081 cerr << "myBitString.optisize(" << size << ") out of memory" << endl;
00082 exit(1);
00083 }
00084 for (; i>=0; --i) data[i] = olddata[i];
00085 if (olddata) delete [] olddata;
00086 }
00087
00088 inline myBitString(void) : size(0), data(NULL) { }
00089 inline ~myBitString(void)
00090 {
00091 if (data) delete [] data;
00092 data=NULL;
00093 size=0;
00094 }
00095 inline myBitString(const myBitString &rhs) : size(rhs.size), data(NULL)
00096 {
00097 data = new(nothrow) BS_unsigned_int[size];
00098 if (!data)
00099 {
00100 cerr << "myBitString.operator=(" << size << ") out of memory" << endl;
00101 exit(1);
00102 }
00103 for (int i=size-1; i>=0; --i) data[i] = rhs.data[i];
00104 }
00105 inline myBitString& operator= (const myBitString &rhs)
00106 {
00107 if (data) delete [] data;
00108 size=rhs.size;
00109 data = new(nothrow) BS_unsigned_int[size];
00110 if (!data)
00111 {
00112 cerr << "myBitString.operator=(" << size << ") out of memory" << endl;
00113 exit(1);
00114 }
00115 for (int i=size-1; i>=0; --i) data[i] = rhs.data[i];
00116 return *this;
00117 }
00118 inline int count(const bool b = true) const
00119 {
00120 int z=0;
00121 if (!b) return -1;
00122 for (int i=0; i<size; i++)
00123 {
00124 register BS_unsigned_int h = data[i];
00125 while (h)
00126 {
00127 if (h&1) z++;
00128 h>>=1;
00129 }
00130 }
00131 return z;
00132 }
00133 inline int first(const bool b = true) const
00134 {
00135 if (b)
00136 {
00137 for (int i=0; i<size; i++)
00138 {
00139 register BS_unsigned_int h = data[i];
00140 if (h)
00141 {
00142 BS_signed_int j;
00143 asm("bsf %0,%1" : "+g" (h), "=c" (j) );
00144 return BS_bits*i+j;
00145 }
00146 }
00147
00148 return -1;
00149 }
00150 else
00151 {
00152 cerr << "myBitString.first(0) not implemented" << endl;
00153 exit(1);
00154 }
00155 }
00156 inline int next(int pos, const bool b = true) const
00157 {
00158 int i,j;
00159 if (pos<0) return first(b);
00160 pos++;
00161 if (b)
00162 {
00163 i=pos>>BS_shifts;
00164 j=pos&BS_mask;
00165 if (i>=size) return -1;
00166 register BS_unsigned_int h = data[i];
00167 h>>=j;
00168 if (h)
00169 {
00170 BS_signed_int j;
00171 asm("bsf %0,%1" : "+g" (h), "=c" (j) );
00172 return pos+j;
00173 }
00174 else
00175 {
00176 j = 0;
00177 for (i++;i<size; i++)
00178 {
00179 h = data[i];
00180 if (h)
00181 {
00182 BS_signed_int j;
00183 asm("bsf %0,%1" : "+g" (h), "=c" (j) );
00184 return BS_bits*i+j;
00185 }
00186 }
00187 }
00188 return -1;
00189 }
00190 else
00191 {
00192 cerr << "myBitString.next(0) not implemented" << endl;
00193 exit(1);
00194 }
00195 }
00196 inline int last(const bool b = true) const
00197 {
00198 if (b)
00199 {
00200 for (register int i=size-1; i>=0; i--)
00201 {
00202
00203 register BS_unsigned_int h = data[i];
00204 if (h)
00205 {
00206 BS_signed_int j;
00207 asm("bsr %0,%1" : "+g" (h), "=c" (j) );
00208 return BS_bits*i+j;
00209 }
00210 }
00211
00212 return -1;
00213 }
00214 else
00215 {
00216 cerr << "myBitString.last(0) not implemented" << endl;
00217 exit(1);
00218 }
00219 }
00220 inline int prev(int pos, const bool b = true) const
00221 {
00222 if (pos>BS_bits*size) return last(b);
00223 if (pos<=0) return -1;
00224 pos--;
00225 if (b)
00226 {
00227 int i=pos>>BS_shifts;
00228 int j=pos&BS_mask;
00229 register BS_unsigned_int h = data[i];
00230 h<<=BS_mask-j;
00231 if (h)
00232 {
00233 BS_signed_int j;
00234 asm("bsr %0,%1" : "+g" (h), "=c" (j) );
00235 return pos-(BS_mask-j);
00236 }
00237 else
00238 {
00239 for (i--;i>=0; i--)
00240 {
00241 h = data[i];
00242 if (h)
00243 {
00244 BS_signed_int j;
00245 asm("bsr %0,%1" : "+g" (h), "=c" (j) );
00246 return BS_bits*i+j;
00247 }
00248 }
00249 }
00250 return -1;
00251 }
00252 else
00253 {
00254 cerr << "myBitString.prev(0) not implemented" << endl;
00255 exit(1);
00256 }
00257 }
00258 inline void invert(const int pos)
00259 {
00260
00261 int i=pos>>BS_shifts;
00262 if (i>=size) resize(i+1);
00263
00264
00265 asm volatile ("btcl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00266
00267 }
00268 inline bool test_and_invert(const int pos)
00269 {
00270
00271 int i=pos>>BS_shifts;
00272 if (i>=size) resize(i+1);
00273 bool ret;
00274 asm ("btcl %[pos],(%[data])\n\tsetc %[flag]" : [flag] "=g" (ret) : [data] "q" (data), [pos] "r" (pos): "cc");
00275
00276 return ret;
00277 }
00278 inline bool test(const int pos) const
00279 {
00280 if ((pos>>BS_shifts)>=size) return false;
00281 bool r;
00282 asm ("btl %2,(%1)\n\tsetc %0" : "=r" (r) : "q" (data), "r" (pos) : "cc");
00283 return r;
00284 }
00285 inline void set(const int pos, const bool b)
00286 {
00287
00288 register int i=pos>>BS_shifts;
00289 if (i>=size)
00290 {
00291 if (b)
00292 resize(i+1);
00293 else
00294 return;
00295 }
00296 if (b)
00297 asm volatile ("btsl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00298 else
00299 asm volatile ("btrl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00300
00301 }
00302 inline void set(const int pos)
00303 {
00304 register int i=pos>>BS_shifts;
00305 if (i>=size) resize(i+1);
00306 asm ("btsl %1,(%0)" : : "q" (data), "r" (pos) : "cc");
00307
00308 }
00309 inline void _xor(const myBitString &s)
00310 {
00311 int i=s.size-1;
00312 while (i>=0 && s.data[i]==0) i--;
00313 if (i>=size)
00314 {
00315 cerr << "myBitString.xor calls resize" << endl;
00316 resize(i+1);
00317 }
00318 for (; i>=0; --i)
00319 data[i] ^= s.data[i];
00320 }
00321 inline const myBitString& operator ^= (const myBitString &s)
00322 {
00323 _xor(s); return *this;
00324 }
00325
00326 inline void _and(const myBitString &s1, const myBitString &s2)
00327
00328 {
00329 int i = MIN(s1.size,s2.size)-1;
00330 if (i>=size)
00331 {
00332
00333
00334
00335
00336 if (data) delete [] data;
00337 size=i+1;
00338 data = new BS_unsigned_int[size];
00339
00340 }
00341 else
00342 {
00343 register int j=size-1;
00344 while (j>i) data[j--]=0;
00345 }
00346 for (; i>=0; --i)
00347 data[i] = s1.data[i] & s2.data[i];
00348 }
00349
00350 template<typename T> void test_and_add_carry(const myBitString &s2, T &CarryVec) const
00351
00352
00353
00354
00355
00356 {
00357 const int s = MIN(size,s2.size);
00358 const int Bits = 8*sizeof(BS_unsigned_int);
00359 for (int i=0; i<s; ++i)
00360 {
00361 register BS_unsigned_int h = data[i] & s2.data[i];
00362 register int j=0;
00363 while (h)
00364 {
00365 if (h&0xff)
00366 {
00367 CarryVec[Bits*i+j ]+=h&1;
00368 CarryVec[Bits*i+j+1]+=(h>>1)&1;
00369 CarryVec[Bits*i+j+2]+=(h>>2)&1;
00370 CarryVec[Bits*i+j+3]+=(h>>3)&1;
00371 CarryVec[Bits*i+j+4]+=(h>>4)&1;
00372 CarryVec[Bits*i+j+5]+=(h>>5)&1;
00373 CarryVec[Bits*i+j+6]+=(h>>6)&1;
00374 CarryVec[Bits*i+j+7]+=(h>>7)&1;
00375 }
00376 h>>=8;
00377 j+=8;
00378 }
00379 }
00380 }
00381
00382 #if 1 && defined(ASM_X86_64)
00383 #if defined(VERBOSE)
00384 #warning "support for test_and_add_carry(...), (uint16, using SSE2)"
00385 #endif
00386 void test_and_add_carry(const myBitString &s2, unsigned short int CarryVec[]) const
00387
00388
00389
00390 {
00391
00392
00393 static const unsigned short int PackedMultipl[8] __attribute__ ((aligned (16))) = { 128,64,32,16,8,4,2,1 };
00394
00395
00396 #ifdef DEBUG
00397 if (reinterpret_cast<unsigned long int>(&PackedMultipl[0]) & 0x0fUL)
00398 {
00399 cerr << "Address of PackedMultipl is " << &PackedMultipl[0]
00400 << " which is not 16 byte aligned as requested!" << endl;
00401 }
00402 #endif
00403
00404 long int s = MIN(size,s2.size)*DW_per_BS_unsigned_int -1;
00405
00406
00407
00408
00409
00410
00411
00412 asm volatile ( \
00413 "test %[i],%[i] \n\t" \
00414 "js 9f \n\t" \
00415 "movl $0x00010001,%%eax \n\t" \
00416 "mov %[i],%%rdx \n\t" \
00417 "movd %%eax,%%xmm7 \n\t" \
00418 "movdqa %[PM],%%xmm6 \n\t" \
00419 "movl (%[data1],%[i],4),%%eax \n\t" \
00420 "pshufd $0x00,%%xmm7,%%xmm7 \n\t" \
00421 "shl $6,%%rdx \n\t" \
00422 "andl (%[data2],%[i],4),%%eax \n\t" \
00423 "add %[V],%%rdx \n\t" \
00424 "movd %%eax,%%xmm4 \n\t" \
00425 "mov %[i],%%rax \n\t" \
00426 "1: \n\t" \
00427 "dec %%rax \n\t" \
00428 "cmovs %[i],%%rax \n\t" \
00429 "prefetchnta -72(%[data2],%[i],4) \n\t" \
00430 "pshuflw $0x00,%%xmm4,%%xmm1 \n\t" \
00431 "pshuflw $0x55,%%xmm4,%%xmm3 \n\t" \
00432 "movd (%[data1],%%rax,4),%%xmm5 \n\t" \
00433 "pshufd $0x00,%%xmm1,%%xmm1 \n\t" \
00434 "pshufd $0x00,%%xmm3,%%xmm2 \n\t" \
00435 "pmullw %%xmm6,%%xmm1 \n\t" \
00436 "movd (%[data2],%%rax,4),%%xmm4 \n\t" \
00437 "movdqa %%xmm1,%%xmm0 \n\t" \
00438 "psrlw $15,%%xmm1 \n\t" \
00439 "pmullw %%xmm6,%%xmm2 \n\t" \
00440 "psrlw $7,%%xmm0 \n\t" \
00441 "movdqa %%xmm2,%%xmm3 \n\t" \
00442 "psrlw $7,%%xmm2 \n\t" \
00443 "pand %%xmm7,%%xmm0 \n\t" \
00444 "psrlw $15,%%xmm3 \n\t" \
00445 "pand %%xmm7,%%xmm2 \n\t" \
00446 "paddw (%%rdx),%%xmm0 \n\t" \
00447 "paddw 16(%%rdx),%%xmm1 \n\t" \
00448 "pand %%xmm5,%%xmm4 \n\t" \
00449 "paddw 32(%%rdx),%%xmm2 \n\t" \
00450 "paddw 48(%%rdx),%%xmm3 \n\t" \
00451 "movdqa %%xmm0,(%%rdx) \n\t" \
00452 "movdqa %%xmm1,16(%%rdx) \n\t" \
00453 "movdqa %%xmm2,32(%%rdx) \n\t" \
00454 "movdqa %%xmm3,48(%%rdx) \n\t" \
00455 "3: sub $64,%%rdx \n\t" \
00456 "dec %[i] \n\t" \
00457 "jns 1b \n\t" \
00458 "9: " \
00459 : [i] "+r" (s)
00460 : [PM] "m" (PackedMultipl[0]), [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00461 : "memory", "cc", "rax", "rdx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
00462 }
00463 #else
00464 void test_and_add_carry(const myBitString &s2, unsigned short int CarryVec[]) const
00465
00466
00467
00468 {
00469
00470 long int s = MIN(size,s2.size)-1;
00471 asm volatile ( \
00472 "test %[i],%[i] \n\t" \
00473 "js 9f \n\t" \
00474 "1: movl (%[data1],%[i],4),%%eax \n\t" \
00475 "mov %[i],%%rdx \n\t" \
00476 "andl (%[data2],%[i],4),%%eax \n\t" \
00477 "shl $5,%%rdx \n\t" \
00478 "2: shrl %%eax \n\t" \
00479 "adcw $0,(%[V],%%rdx,2) \n\t" \
00480 "shrl %%eax \n\t" \
00481 "adcw $0,2(%[V],%%rdx,2) \n\t" \
00482 "shrl %%eax \n\t" \
00483 "adcw $0,4(%[V],%%rdx,2) \n\t" \
00484 "shrl %%eax \n\t" \
00485 "adcw $0,6(%[V],%%rdx,2) \n\t" \
00486 "shrl %%eax \n\t" \
00487 "adcw $0,8(%[V],%%rdx,2) \n\t" \
00488 "shrl %%eax \n\t" \
00489 "adcw $0,10(%[V],%%rdx,2) \n\t" \
00490 "shrl %%eax \n\t" \
00491 "adcw $0,12(%[V],%%rdx,2) \n\t" \
00492 "shrl %%eax \n\t" \
00493 "adcw $0,14(%[V],%%rdx,2) \n\t" \
00494 "add $8,%%rdx \n\t" \
00495 "test %%eax,%%eax \n\t" \
00496 "jnz 2b \n\t" \
00497 "dec %[i] \n\t" \
00498 "jns 1b \n\t" \
00499 "9: " \
00500 : [i] "+r" (s)
00501 : [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00502 : "memory", "cc", "eax", "rdx");
00503 }
00504 #endif
00505
00506
00507 #if 0
00508 void test_and_add_carry(const myBitString &s2, unsigned int CarryVec[]) const
00509
00510
00511
00512 {
00513
00514 int s = MIN(size,s2.size)-1;
00515 asm volatile ( \
00516 "test %[i],%[i] \n\t" \
00517 "js 9f \n\t" \
00518 "1: movl (%[data1],%[i],4),%%eax \n\t" \
00519 "movl %[i],%%edx \n\t" \
00520 "andl (%[data2],%[i],4),%%eax \n\t" \
00521 "shll $5,%%edx \n\t" \
00522 "2: btl $0,%%eax \n\t" \
00523 "adcl $0,(%[V],%%edx,4) \n\t" \
00524 "btl $1,%%eax \n\t" \
00525 "adcl $0,4(%[V],%%edx,4) \n\t" \
00526 "btl $2,%%eax \n\t" \
00527 "adcl $0,8(%[V],%%edx,4) \n\t" \
00528 "btl $3,%%eax \n\t" \
00529 "adcl $0,12(%[V],%%edx,4) \n\t" \
00530 "btl $4,%%eax \n\t" \
00531 "adcl $0,16(%[V],%%edx,4) \n\t" \
00532 "btl $5,%%eax \n\t" \
00533 "adcl $0,20(%[V],%%edx,4) \n\t" \
00534 "btl $6,%%eax \n\t" \
00535 "adcl $0,24(%[V],%%edx,4) \n\t" \
00536 "btl $7,%%eax \n\t" \
00537 "adcl $0,28(%[V],%%edx,4) \n\t" \
00538 "addl $8,%%edx \n\t" \
00539 "shrl $8,%%eax \n\t" \
00540 "jnz 2b \n\t" \
00541 "decl %[i] \n\t" \
00542 "jns 1b \n\t" \
00543 "9: " \
00544 : [i] "+r" (s)
00545 : [data1] "r" (data), [data2] "r" (s2.data), [V] "D" (CarryVec)
00546 : "memory", "cc", "eax", "edx");
00547 }
00548 #endif
00549
00550 };
00551
00552 #endif