1 /** 2 * The Atomic module is intended to provide some basic support for the so called lock-free 3 * concurrent programming. 4 * The current design replaces the previous Atomic module by Sean and is inspired 5 * partly by the llvm atomic operations, and Sean's version 6 * 7 * If no atomic ops are available an (inefficent) fallback solution is provided 8 * For classes atomic access means atomic access to their *address* not their content 9 * 10 * If you want unique counters or flags to communicate in multithreading settings 11 * look at tango.core.sync.Counter that provides them in a better way and handles 12 * better the absence of atomic ops. 13 * 14 * Copyright: Copyright (C) 2008-2010 the blip developer group 15 * License: BSD style: $(LICENSE) 16 * Author: Fawzi Mohamed 17 */ 18 19 module tango.core.sync.Atomic; 20 21 version( LDC ) 22 { 23 import ldc.intrinsics; 24 } 25 26 private { 27 // from tango.core.traits: 28 /** 29 * Evaluates to true if T is a signed or unsigned integer type. 30 */ 31 template isIntegerType( T ) 32 { 33 const bool isIntegerType = isSignedIntegerType!(T) || 34 isUnsignedIntegerType!(T); 35 } 36 /** 37 * Evaluates to true if T is a pointer type. 38 */ 39 template isPointerOrClass(T) 40 { 41 const isPointerOrClass = is(T==class); 42 } 43 44 template isPointerOrClass(T : T*) 45 { 46 const isPointerOrClass = true; 47 } 48 /** 49 * Evaluates to true if T is a signed integer type. 50 */ 51 template isSignedIntegerType( T ) 52 { 53 const bool isSignedIntegerType = is( T == byte ) || 54 is( T == short ) || 55 is( T == int ) || 56 is( T == long )/+|| 57 is( T == cent )+/; 58 } 59 /** 60 * Evaluates to true if T is an unsigned integer type. 61 */ 62 template isUnsignedIntegerType( T ) 63 { 64 const bool isUnsignedIntegerType = is( T == ubyte ) || 65 is( T == ushort ) || 66 is( T == uint ) || 67 is( T == ulong )/+|| 68 is( T == ucent )+/; 69 } 70 71 /// substitutes classes with void* 72 template ClassPtr(T){ 73 static if (is(T==class)){ 74 alias void* ClassPtr; 75 } else { 76 alias T ClassPtr; 77 } 78 } 79 } 80 81 extern(C) void thread_yield(); 82 83 // NOTE: Strictly speaking, the x86 supports atomic operations on 84 // unaligned values. However, this is far slower than the 85 // common case, so such behavior should be prohibited. 86 template atomicValueIsProperlyAligned( T ) 87 { 88 bool atomicValueIsProperlyAligned( size_t addr ) 89 { 90 return addr % ClassPtr!(T).sizeof == 0; 91 } 92 } 93 94 /* 95 * A barrier does not allow some kinds of intermixing and out of order execution 96 * and ensures that all operations of one kind are executed before the operations of the other type 97 * which kind of mixing are not allowed depends from the template arguments 98 * These are global barriers: the whole memory is synchronized (devices excluded if device is false) 99 * 100 * the actual barrier eforced might be stronger than the requested one 101 * 102 * if ll is true loads before the barrier are not allowed to mix with loads after the barrier 103 * if ls is true loads before the barrier are not allowed to mix with stores after the barrier 104 * if sl is true stores before the barrier are not allowed to mix with loads after the barrier 105 * if ss is true stores before the barrier are not allowed to mix with stores after the barrier 106 * if device is true als uncached and device memory is synchronized 107 * 108 * Barriers are typically paired 109 * 110 * For example if you want to ensure that all writes 111 * are done before setting a flags that communicates that an objects is initialized you would 112 * need memoryBarrier(false,false,false,true) before setting the flag. 113 * To read that flag before reading the rest of the object you would need a 114 * memoryBarrier(true,false,false,false) after having read the flag. 115 * 116 * I believe that these two barriers are called acquire and release, but you find several 117 * incompatible definitions around (some obviously wrong), so some care migth be in order 118 * To be safer memoryBarrier(false,true,false,true) might be used for acquire, and 119 * memoryBarrier(true,false,true,false) for release which are slighlty stronger. 120 * 121 * These barriers are also called write barrier and read barrier respectively. 122 * 123 * A full memory fence is (true,true,true,true) and ensures that stores and loads before the 124 * barrier are done before stores and loads after it. 125 * Keep in mind even with a full barrier you still normally need two of them, to avoid that the 126 * other process reorders loads (for example) and still sees things in the wrong order. 127 */ 128 /* llvm_memory_barrier gone? 129 version( LDC ) 130 { 131 void memoryBarrier(bool ll, bool ls, bool sl,bool ss,bool device=false)(){ 132 // XXX: this is overly conservative 133 llvm_memory_fence(); 134 } 135 } else */version(D_InlineAsm_X86){ 136 void memoryBarrier(bool ll, bool ls, bool sl,bool ss,bool device=false)(){ 137 static if (device) { 138 if (ls || sl || ll || ss){ 139 // cpid should sequence even more than mfence 140 asm { 141 push EBX; 142 mov EAX, 0; // model, stepping 143 cpuid; 144 pop EBX; 145 } 146 } 147 } else static if (ls || sl || (ll && ss)){ // use a sequencing operation like cpuid or simply cmpxch instead? 148 asm { 149 mfence; 150 } 151 // this is supposedly faster and correct, but let's play it safe and use the specific instruction 152 // push rax 153 // xchg rax 154 // pop rax 155 } else static if (ll){ 156 asm { 157 lfence; 158 } 159 } else static if( ss ){ 160 asm { 161 sfence; 162 } 163 } 164 } 165 } else version(D_InlineAsm_X86_64){ 166 void memoryBarrier(bool ll, bool ls, bool sl,bool ss,bool device=false)(){ 167 static if (device) { 168 if (ls || sl || ll || ss){ 169 // cpid should sequence even more than mfence 170 asm { 171 push RBX; 172 mov RAX, 0; // model, stepping 173 cpuid; 174 pop RBX; 175 } 176 } 177 } else static if (ls || sl || (ll && ss)){ // use a sequencing operation like cpuid or simply cmpxch instead? 178 asm { 179 mfence; 180 } 181 // this is supposedly faster and correct, but let's play it safe and use the specific instruction 182 // push rax 183 // xchg rax 184 // pop rax 185 } else static if (ll){ 186 asm { 187 lfence; 188 } 189 } else static if( ss ){ 190 asm { 191 sfence; 192 } 193 } 194 } 195 } else { 196 pragma(msg,"WARNING: no atomic operations on this architecture"); 197 pragma(msg,"WARNING: this is *slow* you probably want to change this!"); 198 int dummy; 199 // acquires a lock... probably you will want to skip this 200 void memoryBarrier(bool ll, bool ls, bool sl,bool ss,bool device=false)(){ 201 synchronized { dummy=1; } 202 } 203 enum{LockVersion=true} 204 } 205 206 static if (!is(typeof(LockVersion))) { 207 enum{LockVersion=false} 208 } 209 210 // use stricter fences 211 enum{strictFences=false} 212 213 /// Utility function for a write barrier (disallow store and store reorderig.) 214 void writeBarrier(){ 215 memoryBarrier!(false,false,strictFences,true)(); 216 } 217 /// Utility function for a read barrier (disallow load and load reorderig.) 218 void readBarrier(){ 219 memoryBarrier!(true,strictFences,false,false)(); 220 } 221 /// Utility function for a full barrier (disallow reorderig.) 222 void fullBarrier(){ 223 memoryBarrier!(true,true,true,true)(); 224 } 225 226 /* 227 * Atomic swap. 228 * val and newval in one atomic operation 229 * barriers are not implied, just atomicity! 230 */ 231 version(LDC){ 232 T atomicSwap( T )( ref T val, T newval ) 233 { 234 T oldval = void; 235 static if (isPointerOrClass!(T)) 236 { 237 oldval = cast(T)llvm_atomic_swap!(size_t)(cast(shared(size_t)*)&val, cast(size_t)newval); 238 } 239 else static if (is(T == bool)) 240 { 241 oldval = llvm_atomic_swap!(ubyte)(cast(shared(ubyte)*)&val, newval?1:0)?0:1; 242 } 243 else 244 { 245 oldval = llvm_atomic_swap!(T)(cast(shared)&val, newval); 246 } 247 return oldval; 248 } 249 } else version(D_InlineAsm_X86) { 250 T atomicSwap( T )( ref T val, T newval ) 251 in { 252 // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires 253 // 4 byte alignment, so use size_t as the align type here. 254 static if( T.sizeof > size_t.sizeof ) 255 assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) ); 256 else 257 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 258 } body { 259 T*posVal=&val; 260 static if( T.sizeof == byte.sizeof ) { 261 asm { 262 mov AL, newval; 263 mov ECX, posVal; 264 lock; // lock always needed to make this op atomic 265 xchg [ECX], AL; 266 } 267 } 268 else static if( T.sizeof == short.sizeof ) { 269 asm { 270 mov AX, newval; 271 mov ECX, posVal; 272 lock; // lock always needed to make this op atomic 273 xchg [ECX], AX; 274 } 275 } 276 else static if( T.sizeof == int.sizeof ) { 277 asm { 278 mov EAX, newval; 279 mov ECX, posVal; 280 lock; // lock always needed to make this op atomic 281 xchg [ECX], EAX; 282 } 283 } 284 else static if( T.sizeof == long.sizeof ) { 285 // 8 Byte swap on 32-Bit Processor, use CAS? 286 static assert( false, "Invalid template type specified, 8bytes in 32 bit mode: "~T.stringof ); 287 } 288 else 289 { 290 static assert( false, "Invalid template type specified: "~T.stringof ); 291 } 292 } 293 } else version (D_InlineAsm_X86_64){ 294 T atomicSwap( T )( ref T val, T newval ) 295 in { 296 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 297 } body { 298 T*posVal=&val; 299 static if( T.sizeof == byte.sizeof ) { 300 asm { 301 mov AL, newval; 302 mov RCX, posVal; 303 lock; // lock always needed to make this op atomic 304 xchg [RCX], AL; 305 } 306 } 307 else static if( T.sizeof == short.sizeof ) { 308 asm { 309 mov AX, newval; 310 mov RCX, posVal; 311 lock; // lock always needed to make this op atomic 312 xchg [RCX], AX; 313 } 314 } 315 else static if( T.sizeof == int.sizeof ) { 316 asm { 317 mov EAX, newval; 318 mov RCX, posVal; 319 lock; // lock always needed to make this op atomic 320 xchg [RCX], EAX; 321 } 322 } 323 else static if( T.sizeof == long.sizeof ) { 324 asm { 325 mov RAX, newval; 326 mov RCX, posVal; 327 lock; // lock always needed to make this op atomic 328 xchg [RCX], RAX; 329 } 330 } 331 else 332 { 333 static assert( false, "Invalid template type specified: "~T.stringof ); 334 } 335 } 336 } else { 337 T atomicSwap( T )( ref T val, T newval ) 338 in { 339 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 340 } body { 341 T oldVal; 342 synchronized(typeid(T)){ 343 oldVal=val; 344 val=newval; 345 } 346 return oldVal; 347 } 348 } 349 350 //--------------------- 351 // internal conversion template 352 private T aCasT(T,V)(ref T val, T newval, T equalTo){ 353 union UVConv{V v; T t;} 354 union UVPtrConv{V *v; T *t;} 355 UVConv vNew,vOld,vAtt; 356 UVPtrConv valPtr; 357 vNew.t=newval; 358 vOld.t=equalTo; 359 valPtr.t=&val; 360 vAtt.v=atomicCAS(*valPtr.v,vNew.v,vOld.v); 361 return vAtt.t; 362 } 363 /// internal reduction 364 private T aCas(T)(ref T val, T newval, T equalTo){ 365 static if (T.sizeof==1){ 366 return aCasT!(T,ubyte)(val,newval,equalTo); 367 } else static if (T.sizeof==2){ 368 return aCasT!(T,ushort)(val,newval,equalTo); 369 } else static if (T.sizeof==4){ 370 return aCasT!(T,uint)(val,newval,equalTo); 371 } else static if (T.sizeof==8){ // unclear if it is always supported... 372 return aCasT!(T,ulong)(val,newval,equalTo); 373 } else { 374 static assert(0,"invalid type "~T.stringof); 375 } 376 } 377 378 /* 379 * Atomic compare & exchange (can be used to implement everything else) 380 * stores newval into val if val==equalTo in one atomic operation. 381 * Barriers are not implied, just atomicity! 382 * Returns the value that is checked against equalTo (i.e. an exchange was performed 383 * if result==equalTo, otherwise one can use the result as the current value). 384 */ 385 version(LDC){ 386 T atomicCAS( T )( ref T val, T newval, T equalTo ) 387 { 388 T oldval = void; 389 static if (isPointerOrClass!(T)) 390 { 391 oldval = cast(T)cast(void*)llvm_atomic_cmp_swap!(size_t)(cast(shared(size_t)*)cast(void*)&val, cast(size_t)cast(void*)equalTo, cast(size_t)cast(void*)newval); 392 } 393 else static if (is(T == bool)) // correct also if bol has different size? 394 { 395 oldval = aCas(val,newval,equalTo); // assuming true is *always* 1 and not a non zero value... 396 } 397 else static if (isIntegerType!(T)) 398 { 399 oldval = llvm_atomic_cmp_swap!(T)(cast(shared)&val, equalTo, newval); 400 } else { 401 oldval = aCas(val,newval,equalTo); 402 } 403 return oldval; 404 } 405 } else version(D_InlineAsm_X86) { 406 version(OSX){ 407 extern(C) ubyte OSAtomicCompareAndSwap64(long oldValue, long newValue, 408 long *theValue); // assumes that in C sizeof(_Bool)==1 (as given in osx IA-32 ABI) 409 } 410 T atomicCAS( T )( ref T val, T newval, T equalTo ) 411 in { 412 // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires 413 // 4 byte alignment, so use size_t as the align type here. 414 static if( ClassPtr!(T).sizeof > size_t.sizeof ) 415 assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) ); 416 else 417 assert( atomicValueIsProperlyAligned!(ClassPtr!(T))( cast(size_t) &val ) ); 418 } body { 419 T*posVal=&val; 420 static if( T.sizeof == byte.sizeof ) { 421 asm { 422 mov DL, newval; 423 mov AL, equalTo; 424 mov ECX, posVal; 425 lock; // lock always needed to make this op atomic 426 cmpxchg [ECX], DL; 427 } 428 } 429 else static if( T.sizeof == short.sizeof ) { 430 asm { 431 mov DX, newval; 432 mov AX, equalTo; 433 mov ECX, posVal; 434 lock; // lock always needed to make this op atomic 435 cmpxchg [ECX], DX; 436 } 437 } 438 else static if( ClassPtr!(T).sizeof == int.sizeof ) { 439 asm { 440 mov EDX, newval; 441 mov EAX, equalTo; 442 mov ECX, posVal; 443 lock; // lock always needed to make this op atomic 444 cmpxchg [ECX], EDX; 445 } 446 } 447 else static if( T.sizeof == long.sizeof ) { 448 // 8 Byte StoreIf on 32-Bit Processor 449 version(OSX){ 450 union UVConv{long v; T t;} 451 union UVPtrConv{long *v; T *t;} 452 UVConv vEqual,vNew; 453 UVPtrConv valPtr; 454 vEqual.t=equalTo; 455 vNew.t=newval; 456 valPtr.t=&val; 457 while(1){ 458 if(OSAtomicCompareAndSwap64(vEqual.v, vNew.v, valPtr.v)!=0) 459 { 460 return equalTo; 461 } else { 462 { 463 T res=val; 464 if (res!is equalTo) return res; 465 } 466 } 467 } 468 } else { 469 T res; 470 asm 471 { 472 push EDI; 473 push EBX; 474 lea EDI, newval; 475 mov EBX, [EDI]; 476 mov ECX, 4[EDI]; 477 lea EDI, equalTo; 478 mov EAX, [EDI]; 479 mov EDX, 4[EDI]; 480 mov EDI, val; 481 lock; // lock always needed to make this op atomic 482 cmpxchg8b [EDI]; 483 lea EDI, res; 484 mov [EDI], EAX; 485 mov 4[EDI], EDX; 486 pop EBX; 487 pop EDI; 488 } 489 return res; 490 } 491 } 492 else 493 { 494 static assert( false, "Invalid template type specified: "~T.stringof ); 495 } 496 } 497 } else version (D_InlineAsm_X86_64){ 498 T atomicCAS( T )( ref T val, T newval, T equalTo ) 499 in { 500 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 501 } body { 502 T*posVal=&val; 503 static if( T.sizeof == byte.sizeof ) { 504 asm { 505 mov DL, newval; 506 mov AL, equalTo; 507 mov RCX, posVal; 508 lock; // lock always needed to make this op atomic 509 cmpxchg [RCX], DL; 510 } 511 } 512 else static if( T.sizeof == short.sizeof ) { 513 asm { 514 mov DX, newval; 515 mov AX, equalTo; 516 mov RCX, posVal; 517 lock; // lock always needed to make this op atomic 518 cmpxchg [RCX], DX; 519 } 520 } 521 else static if( ClassPtr!(T).sizeof == int.sizeof ) { 522 asm { 523 mov EDX, newval; 524 mov EAX, equalTo; 525 mov RCX, posVal; 526 lock; // lock always needed to make this op atomic 527 cmpxchg [RCX], EDX; 528 } 529 } 530 else static if( ClassPtr!(T).sizeof == long.sizeof ) { 531 asm { 532 mov RDX, newval; 533 mov RAX, equalTo; 534 mov RCX, posVal; 535 lock; // lock always needed to make this op atomic 536 cmpxchg [RCX], RDX; 537 } 538 } 539 else 540 { 541 static assert( false, "Invalid template type specified: "~T.stringof ); 542 } 543 } 544 } else { 545 T atomicCAS( T )( ref T val, T newval, T equalTo ) 546 in { 547 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 548 } body { 549 T oldval; 550 synchronized(typeid(T)){ 551 oldval=val; 552 if(oldval==equalTo) { 553 val=newval; 554 } 555 } 556 return oldval; 557 } 558 } 559 560 bool atomicCASB(T)( ref T val, T newval, T equalTo ){ 561 return (equalTo is atomicCAS(val,newval,equalTo)); 562 } 563 564 /* 565 * Loads a value from memory. 566 * 567 * At the moment it is assumed that all aligned memory accesses are atomic 568 * in the sense that all bits are consistent with some store. 569 * 570 * Remove this? I know no actual architecture where this would be different. 571 */ 572 T atomicLoad(T)(ref T val) 573 in { 574 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); 575 static assert(ClassPtr!(T).sizeof<=size_t.sizeof,"invalid size for "~T.stringof); 576 } body { 577 T res=val; 578 return res; 579 } 580 581 /* 582 * Stores a value the the memory. 583 * 584 * At the moment it is assumed that all aligned memory accesses are atomic 585 * in the sense that a load either sees the complete store or the previous value. 586 * 587 * Remove this? I know no actual architecture where this would be different. 588 */ 589 void atomicStore(T)(ref T val, T newVal) 590 in { 591 assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ), "invalid alignment" ); 592 static assert(ClassPtr!(T).sizeof<=size_t.sizeof,"invalid size for "~T.stringof); 593 } body { 594 val=newVal; 595 } 596 597 /* 598 * Increments the given value and returns the previous value with an atomic operation. 599 * 600 * Some architectures might allow just increments/decrements by 1. 601 * No barriers implied, only atomicity! 602 */ 603 version(LDC){ 604 T atomicAdd(T)(ref T val, T incV){ 605 static if (isPointerOrClass!(T)) 606 { 607 return cast(T)llvm_atomic_load_add!(size_t)(cast(shared(size_t)*)&val, incV); 608 } 609 else static if (isIntegerType!(T)) 610 { 611 static assert( isIntegerType!(T), "invalid type "~T.stringof ); 612 return llvm_atomic_load_add!(T)(cast(shared)&val, cast(T)incV); 613 } else { 614 return atomicOp(val,delegate T(T a){ return a+incV; }); 615 } 616 } 617 } else version (D_InlineAsm_X86){ 618 T atomicAdd(T,U=T)(ref T val, U incV_){ 619 T incV=cast(T)incV_; 620 static if (isIntegerType!(T)||isPointerOrClass!(T)){ 621 T* posVal=&val; 622 T res; 623 static if (T.sizeof==1){ 624 asm { 625 mov DL, incV; 626 mov ECX, posVal; 627 lock; 628 xadd byte ptr [ECX],DL; 629 mov byte ptr res[EBP],DL; 630 } 631 } else static if (T.sizeof==2){ 632 asm { 633 mov DX, incV; 634 mov ECX, posVal; 635 lock; 636 xadd short ptr [ECX],DX; 637 mov short ptr res[EBP],DX; 638 } 639 } else static if (T.sizeof==4){ 640 asm 641 { 642 mov EDX, incV; 643 mov ECX, posVal; 644 lock; 645 xadd int ptr [ECX],EDX; 646 mov int ptr res[EBP],EDX; 647 } 648 } else static if (T.sizeof==8){ 649 return atomicOp(val,delegate (T x){ return x+incV; }); 650 } else { 651 static assert(0,"Unsupported type size"); 652 } 653 return res; 654 } else { 655 return atomicOp(val,delegate T(T a){ return a+incV; }); 656 } 657 } 658 } else version (D_InlineAsm_X86_64){ 659 T atomicAdd(T,U=T)(ref T val, U incV_){ 660 T incV=cast(T)incV_; 661 static if (isIntegerType!(T)||isPointerOrClass!(T)){ 662 T* posVal=&val; 663 T res; 664 static if (T.sizeof==1){ 665 asm { 666 mov DL, incV; 667 mov RCX, posVal; 668 lock; 669 xadd byte ptr [RCX],DL; 670 mov byte ptr res[EBP],DL; 671 } 672 } else static if (T.sizeof==2){ 673 asm { 674 mov DX, incV; 675 mov RCX, posVal; 676 lock; 677 xadd short ptr [RCX],DX; 678 mov short ptr res[EBP],DX; 679 } 680 } else static if (T.sizeof==4){ 681 asm 682 { 683 mov EDX, incV; 684 mov RCX, posVal; 685 lock; 686 xadd int ptr [RCX],EDX; 687 mov int ptr res[EBP],EDX; 688 } 689 } else static if (T.sizeof==8){ 690 asm 691 { 692 mov RAX, val; 693 mov RDX, incV; 694 lock; // lock always needed to make this op atomic 695 xadd qword ptr [RAX],RDX; 696 mov res[EBP],RDX; 697 } 698 } else { 699 static assert(0,"Unsupported type size for type:"~T.stringof); 700 } 701 return res; 702 } else { 703 return atomicOp(val,delegate T(T a){ return a+incV; }); 704 } 705 } 706 } else { 707 static if (LockVersion){ 708 T atomicAdd(T,U=T)(ref T val, U incV_){ 709 T incV=cast(T)incV_; 710 static assert( isIntegerType!(T)||isPointerOrClass!(T),"invalid type: "~T.stringof ); 711 synchronized(typeid(T)){ 712 T oldV=val; 713 val+=incV; 714 return oldV; 715 } 716 } 717 } else { 718 T atomicAdd(T,U=T)(ref T val, U incV_){ 719 T incV=cast(T)incV_; 720 static assert( isIntegerType!(T)||isPointerOrClass!(T),"invalid type: "~T.stringof ); 721 synchronized(typeid(T)){ 722 T oldV,newVal,nextVal; 723 nextVal=val; 724 do{ 725 oldV=nextVal; 726 newV=oldV+incV; 727 auto nextVal=atomicCAS!(T)(val,newV,oldV); 728 } while(nextVal!=oldV); 729 return oldV; 730 } 731 } 732 } 733 } 734 735 /* 736 * Applies a pure function atomically. 737 * The function should be pure as it might be called several times to ensure atomicity 738 * The function should take a short time to compute otherwise contention is possible 739 * and no "fair" share is applied between fast function (more likely to succeed) and 740 * the others (i.e. do not use this in case of high contention). 741 */ 742 T atomicOp(T)(ref T val, T delegate(T) f){ 743 T oldV,newV,nextV; 744 int i=0; 745 nextV=val; 746 do { 747 oldV=nextV; 748 newV=f(oldV); 749 nextV=aCas!(T)(val,newV,oldV); 750 if (nextV is oldV || newV is oldV) return oldV; 751 } while(++i<200); 752 while (true){ 753 thread_yield(); 754 oldV=val; 755 newV=f(oldV); 756 nextV=aCas!(T)(val,newV,oldV); 757 if (nextV is oldV || newV is oldV) return oldV; 758 } 759 } 760 761 /* 762 * Reads a flag (ensuring that other accesses can not happen before you read it). 763 */ 764 T flagGet(T)(ref T flag){ 765 T res; 766 res=flag; 767 memoryBarrier!(true,false,strictFences,false)(); 768 return res; 769 } 770 771 /* 772 * Sets a flag (ensuring that all pending writes are executed before this). 773 * the original value is returned. 774 */ 775 T flagSet(T)(ref T flag,T newVal){ 776 memoryBarrier!(false,strictFences,false,true)(); 777 return atomicSwap(flag,newVal); 778 } 779 780 /* 781 * Writes a flag (ensuring that all pending writes are executed before this). 782 * the original value is returned. 783 */ 784 T flagOp(T)(ref T flag,T delegate(T) op){ 785 memoryBarrier!(false,strictFences,false,true)(); 786 return atomicOp(flag,op); 787 } 788 789 /* 790 * Reads a flag (ensuring that all pending writes are executed before this). 791 */ 792 T flagAdd(T)(ref T flag,T incV=cast(T)1){ 793 static if (!LockVersion) 794 memoryBarrier!(false,strictFences,false,true)(); 795 return atomicAdd(flag,incV); 796 } 797 798 /* 799 * Returns the value of val and increments it in one atomic operation 800 * useful for counters, and to generate unique values (fast) 801 * no barriers are implied. 802 */ 803 T nextValue(T)(ref T val){ 804 return atomicAdd(val,cast(T)1); 805 }