1 /** 2 Identify the characteristics of the host CPU, providing information 3 about cache sizes and assembly optimisation hints. 4 5 Some of this information was extremely difficult to track down. Some of the 6 documents below were found only in cached versions stored by search engines! 7 This code relies on information found in: 8 9 $(UL 10 $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual, 11 Volume 2A: Instruction Set Reference, A-M" (2007).) 12 $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).) 13 $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD 14 Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).) 15 $(LI "AMD Geode(TM) GX Processors Data Book", 16 Advanced Micro Devices, Publication ID 31505E, (2005).) 17 $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).) 18 $(LI "Application note 106: Software Customization for the 6x86 Family", 19 Cyrix Corporation, Rev 1.5 (1998)) 20 $(LI $(LINK http://ftp.intron.ac/pub/document/cpu/cpuid.htm)) 21 $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", 22 National Semiconductor, (2002)) 23 $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).) 24 $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm)) 25 $(LI $(LINK http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html)) 26 $(LI "What every programmer should know about memory", 27 Ulrich Depper, Red Hat, Inc., (2007).) 28 $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009). 29 $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)) 30 $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application 31 Note 485" (2009).) 32 ) 33 34 AUTHORS: Don Clugston, 35 Tomas Lindquist Olsen $(EMAIL tomas@famolsen.dk) 36 COPYRIGHT: Public Domain 37 38 BUGS: Currently only works on x86 CPUs. 39 Many processors have bugs in their microcode for the CPUID instruction, 40 so sometimes the cache information may be incorrect. 41 */ 42 43 module tango.core.tools.Cpuid; 44 45 // If optimizing for a particular processor, it is generally better 46 // to identify based on features rather than model. NOTE: Normally 47 // it's only worthwhile to optimise for the latest Intel and AMD CPU, 48 // with a backup for other CPUs. 49 // Pentium -- preferPentium1() 50 // PMMX -- + mmx() 51 // PPro -- default 52 // PII -- + mmx() 53 // PIII -- + mmx() + sse() 54 // PentiumM -- + mmx() + sse() + sse2() 55 // Pentium4 -- preferPentium4() 56 // PentiumD -- + isX86_64() 57 // Core2 -- default + isX86_64() 58 // AMD K5 -- preferPentium1() 59 // AMD K6 -- + mmx() 60 // AMD K6-II -- + mmx() + 3dnow() 61 // AMD K7 -- preferAthlon() 62 // AMD K8 -- + sse2() 63 // AMD K10 -- + isX86_64() 64 // Cyrix 6x86 -- preferPentium1() 65 // 6x86MX -- + mmx() 66 67 public: 68 69 /// Cache size and behaviour. 70 struct CacheInfo 71 { 72 /// Size of the cache, in kilobytes, per CPU. 73 /// For L1 unified (data + code) caches, this size is half the physical size. 74 /// (we don't halve it for larger sizes, since normally 75 /// data size is much greater than code size for critical loops). 76 uint size; 77 /// Number of ways of associativity, eg:$(BR) 78 /// 1 = direct mapped$(BR) 79 /// 2 = 2-way set associative$(BR) 80 /// 3 = 3-way set associative$(BR) 81 /// ubyte.max = fully associative 82 ubyte associativity; 83 /// Number of bytes read into the cache when a cache miss occurs. 84 uint lineSize; 85 } 86 87 public: 88 /// Returns vendor string, for display purposes only. 89 /// Do NOT use this to determine features! 90 /// Note that some CPUs have programmable vendorIDs. 91 const(char)[] vendor() {return cast(const(char)[]) vendorID;} 92 /// Returns processor string, for display purposes only 93 const(char)[] processor() {return processorName;} 94 95 /// The data caches. If there are fewer than 5 physical caches levels, 96 /// the remaining levels are set to uint.max (== entire memory space) 97 CacheInfo[5] datacache; 98 /// Does it have an x87 FPU on-chip? 99 @property bool x87onChip() {return (features&FPU_BIT)!=0;} 100 /// Is MMX supported? 101 @property bool mmx() {return (features&MMX_BIT)!=0;} 102 /// Is SSE supported? 103 @property bool sse() {return (features&SSE_BIT)!=0;} 104 /// Is SSE2 supported? 105 @property bool sse2() {return (features&SSE2_BIT)!=0;} 106 /// Is SSE3 supported? 107 @property bool sse3() {return (miscfeatures&SSE3_BIT)!=0;} 108 /// Is SSSE3 supported? 109 @property bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;} 110 /// Is SSE4.1 supported? 111 @property bool sse41() {return (miscfeatures&SSE41_BIT)!=0;} 112 /// Is SSE4.2 supported? 113 @property bool sse42() {return (miscfeatures&SSE42_BIT)!=0;} 114 /// Is SSE4a supported? 115 @property bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;} 116 /// Is AMD 3DNOW supported? 117 @property bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;} 118 /// Is AMD 3DNOW Ext supported? 119 @property bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;} 120 /// Are AMD extensions to MMX supported? 121 @property bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;} 122 /// Is fxsave/fxrstor supported? 123 @property bool hasFxsr() {return (features&FXSR_BIT)!=0;} 124 /// Is cmov supported? 125 @property bool hasCmov() {return (features&CMOV_BIT)!=0;} 126 /// Is rdtsc supported? 127 @property bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;} 128 /// Is cmpxchg8b supported? 129 @property bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;} 130 /// Is cmpxchg8b supported? 131 @property bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;} 132 /// Is SYSENTER/SYSEXIT supported? 133 @property bool hasSysEnterSysExit() { 134 // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII. 135 // (REF: www.geoffchappell.com). 136 if (probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3))))) 137 return false; 138 return (features & SYSENTERSYSEXIT_BIT)!=0; 139 } 140 141 /// Is 3DNow prefetch supported? 142 @property bool has3dnowPrefetch() 143 {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;} 144 /// Are LAHF and SAHF supported in 64-bit mode? 145 @property bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;} 146 /// Is POPCNT supported? 147 @property bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;} 148 /// Is LZCNT supported? 149 @property bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;} 150 /// Is this an Intel64 or AMD 64? 151 @property bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;} 152 153 /// Is this an IA64 (Itanium) processor? 154 @property bool isItanium() { return (features&IA64_BIT)!=0; } 155 156 /// Is hyperthreading supported? 157 @property bool hyperThreading() { return maxThreads>maxCores; } 158 /// Returns number of threads per CPU 159 @property uint threadsPerCPU() {return maxThreads;} 160 /// Returns number of cores in CPU 161 @property uint coresPerCPU() {return maxCores;} 162 163 /// Optimisation hints for assembly code. 164 /// For forward compatibility, the CPU is compared against different 165 /// microarchitectures. For 32-bit X86, comparisons are made against 166 /// the Intel PPro/PII/PIII/PM family. 167 /// 168 /// The major 32-bit x86 microarchitecture 'dynasties' have been: 169 /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). 170 /// (2) AMD Athlon (K7, K8, K10). 171 /// (3) Intel NetBurst (Pentium 4, Pentium D). 172 /// (4) In-order Pentium (Pentium1, PMMX, Atom) 173 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, 174 /// Cyrix, Rise) were mostly in-order. 175 /// Some new processors do not fit into the existing categories: 176 /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. 177 /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. 178 /// 179 /// Within each dynasty, the optimisation techniques are largely 180 /// identical (eg, use instruction pairing for group 4). Major 181 /// instruction set improvements occur within each dynasty. 182 183 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? 184 @property bool preferAthlon() { return probablyAMD && family >=6; } 185 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? 186 @property bool preferPentium4() { return probablyIntel && family == 0xF; } 187 /// Does this CPU perform better on Pentium I code than Pentium Pro code? 188 @property bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); } 189 190 public: 191 /// Processor type (vendor-dependent). 192 /// This should be visible ONLY for display purposes. 193 uint stepping, model, family; 194 uint numCacheLevels = 1; 195 private: 196 bool probablyIntel; // true = _probably_ an Intel processor, might be faking 197 bool probablyAMD; // true = _probably_ an AMD processor 198 char[12] vendorID; 199 string processorName; 200 char[48] processorNameBuffer; 201 uint features = 0; // mmx, sse, sse2, hyperthreading, etc 202 uint miscfeatures = 0; // sse3, etc. 203 uint amdfeatures = 0; // 3DNow!, mmxext, etc 204 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc 205 uint maxCores = 1; 206 uint maxThreads = 1; 207 // Note that this may indicate multi-core rather than hyperthreading. 208 bool hyperThreadingBit() { return (features&HTT_BIT)!=0;} 209 210 // feature flags CPUID1_EDX 211 enum : uint 212 { 213 FPU_BIT = 1, 214 TIMESTAMP_BIT = 1<<4, // rdtsc 215 MDSR_BIT = 1<<5, // RDMSR/WRMSR 216 CMPXCHG8B_BIT = 1<<8, 217 SYSENTERSYSEXIT_BIT = 1<<11, 218 CMOV_BIT = 1<<15, 219 MMX_BIT = 1<<23, 220 FXSR_BIT = 1<<24, 221 SSE_BIT = 1<<25, 222 SSE2_BIT = 1<<26, 223 HTT_BIT = 1<<28, 224 IA64_BIT = 1<<30 225 } 226 // feature flags misc CPUID1_ECX 227 enum : uint 228 { 229 SSE3_BIT = 1, 230 PCLMULQDQ_BIT = 1<<1, // from AVX 231 MWAIT_BIT = 1<<3, 232 SSSE3_BIT = 1<<9, 233 FMA_BIT = 1<<12, // from AVX 234 CMPXCHG16B_BIT = 1<<13, 235 SSE41_BIT = 1<<19, 236 SSE42_BIT = 1<<20, 237 POPCNT_BIT = 1<<23, 238 AES_BIT = 1<<25, // AES instructions from AVX 239 OSXSAVE_BIT = 1<<27, // Used for AVX 240 AVX_BIT = 1<<28 241 } 242 /+ 243 version(X86_64) { 244 bool hasAVXinHardware() { 245 // This only indicates hardware support, not OS support. 246 return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT); 247 } 248 // Is AVX supported (in both hardware & OS)? 249 bool Avx() { 250 if (!hasAVXinHardware()) return false; 251 // Check for OS support 252 uint xfeatures; 253 asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; } 254 return (xfeatures&0x6)==6; 255 } 256 bool hasAvxFma() { 257 if (!AVX()) return false; 258 return (features&FMA_BIT)!=0; 259 } 260 } 261 +/ 262 // AMD feature flags CPUID80000001_EDX 263 enum : uint 264 { 265 AMD_MMX_BIT = 1<<22, 266 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. 267 FFXSR_BIT = 1<<25, 268 PAGE1GB_BIT = 1<<26, // support for 1GB pages 269 RDTSCP_BIT = 1<<27, 270 AMD64_BIT = 1<<29, 271 AMD_3DNOW_EXT_BIT = 1<<30, 272 AMD_3DNOW_BIT = 1<<31 273 } 274 // AMD misc feature flags CPUID80000001_ECX 275 enum : uint 276 { 277 LAHFSAHF_BIT = 1, 278 LZCNT_BIT = 1<<5, 279 SSE4A_BIT = 1<<6, 280 AMD_3DNOW_PREFETCH_BIT = 1<<8 281 } 282 283 version(GNU){ 284 // GDC is a filthy liar. It can't actually do inline asm. 285 } else version(D_InlineAsm_X86) { 286 version = Really_D_InlineAsm_X86; 287 } 288 289 version(Really_D_InlineAsm_X86) { 290 // Note that this code will also work for Itanium in x86 mode. 291 292 uint max_cpuid, max_extended_cpuid; 293 294 // CPUID2: "cache and tlb information" 295 void getcacheinfoCPUID2() 296 { 297 // CPUID2 is a dog's breakfast. What was Intel thinking??? 298 // We are only interested in the data caches 299 void decipherCpuid2(ubyte x) { 300 if (x==0) return; 301 // Values from http://www.sandpile.org/ia32/cpuid.htm. 302 // Includes Itanium and non-Intel CPUs. 303 // 304 static ubyte[63] ids = [ 305 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, 306 // level 2 cache 307 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, 308 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, 309 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, 310 // level 3 cache 311 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 312 313 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE, 314 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC 315 ]; 316 static uint[63] sizes = [ 317 8, 16, 16, 64, 16, 24, 8, 16, 32, 318 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, 319 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, 320 128, 192, 128, 256, 384, 512, 3072, 512, 128, 321 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024, 322 323 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024, 324 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024 325 ]; 326 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative 327 static ubyte[63] ways = [ 328 2, 4, 4, 8, 8, 6, 4, 4, 4, 329 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, 330 8, 8, 8, 8, 4, 8, 16, 24, 331 4, 6, 2, 4, 6, 4, 12, 8, 8, 332 4, 8, 8, 8, 4, 8, 12, 16, 12, 16, 333 4, 4, 4, 8, 8, 8, 12, 12, 12, 334 16, 16, 16, 24, 24, 24 335 ]; 336 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } 337 for (int i=0; i< ids.length; ++i) { 338 if (x==ids[i]) { 339 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; 340 if (x==0x49 && family==0xF && model==0x6) level=2; 341 datacache[level].size=sizes[i]; 342 datacache[level].associativity=ways[i]; 343 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80) 344 || x==0x86 || x==0x87 345 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ 346 datacache[level].lineSize = 64; 347 } else datacache[level].lineSize = 32; 348 } 349 } 350 } 351 352 uint[4] a; 353 bool firstTime = true; 354 // On a multi-core system, this could theoretically fail, but it's only used 355 // for old single-core CPUs. 356 uint numinfos = 1; 357 do { 358 asm { 359 mov EAX, 2; 360 cpuid; 361 mov a, EAX; 362 mov a+4, EBX; 363 mov a+8, ECX; 364 mov a+12, EDX; 365 } 366 if (firstTime) { 367 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { 368 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. 369 // These are NOT standard Intel values 370 // (TLB = 32 entry, 4 way associative, 4K pages) 371 // (L1 cache = 16K, 4way, linesize16) 372 datacache[0].size=8; 373 datacache[0].associativity=4; 374 datacache[0].lineSize=16; 375 return; 376 } 377 // lsb of a is how many times to loop. 378 numinfos = a[0] & 0xFF; 379 // and otherwise it should be ignored 380 a[0] &= 0xFFFF_FF00; 381 firstTime = false; 382 } 383 for (int c=0; c<4;++c) { 384 // high bit set == no info. 385 if (a[c] & 0x8000_0000) continue; 386 decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); 387 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); 388 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); 389 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); 390 } 391 } while (--numinfos); 392 } 393 394 // CPUID4: "Deterministic cache parameters" leaf 395 void getcacheinfoCPUID4() 396 { 397 int cachenum = 0; 398 for(;;) { 399 uint a, b, number_of_sets; 400 asm { 401 mov EAX, 4; 402 mov ECX, cachenum; 403 cpuid; 404 mov a, EAX; 405 mov b, EBX; 406 mov number_of_sets, ECX; 407 } 408 ++cachenum; 409 if ((a&0x1F)==0) break; // no more caches 410 uint numthreads = ((a>>14) & 0xFFF) + 1; 411 uint numcores = ((a>>26) & 0x3F) + 1; 412 if (numcores > maxCores) maxCores = numcores; 413 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches 414 415 ++number_of_sets; 416 ubyte level = cast(ubyte)(((a>>5)&7)-1); 417 if (level > datacache.length) continue; // ignore deep caches 418 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); 419 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size 420 uint line_partitions = ((b >> 12)& 0x3FF) + 1; 421 // Size = number of sets * associativity * cachelinesize * linepartitions 422 // and must convert to Kb, also dividing by the number of hyperthreads using this cache. 423 ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * 424 datacache[level].associativity : number_of_sets; 425 datacache[level].size = cast(uint)( 426 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); 427 if (level == 0 && (a&0xF)==3) { 428 // Halve the size for unified L1 caches 429 datacache[level].size/=2; 430 } 431 } 432 } 433 434 // CPUID8000_0005 & 6 435 void getAMDcacheinfo() 436 { 437 uint c5, c6, d6; 438 asm { 439 mov EAX, 0x8000_0005; // L1 cache 440 cpuid; 441 // EAX has L1_TLB_4M. 442 // EBX has L1_TLB_4K 443 // EDX has L1 instruction cache 444 mov c5, ECX; 445 } 446 447 datacache[0].size = ( (c5>>24) & 0xFF); 448 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); 449 datacache[0].lineSize = c5 & 0xFF; 450 451 if (max_extended_cpuid >= 0x8000_0006) { 452 // AMD K6-III or K6-2+ or later. 453 ubyte numcores = 1; 454 if (max_extended_cpuid >=0x8000_0008) { 455 asm { 456 mov EAX, 0x8000_0008; 457 cpuid; 458 mov numcores, CL; 459 } 460 ++numcores; 461 if (numcores>maxCores) maxCores = numcores; 462 } 463 asm { 464 mov EAX, 0x8000_0006; // L2/L3 cache 465 cpuid; 466 mov c6, ECX; // L2 cache info 467 mov d6, EDX; // L3 cache info 468 } 469 470 ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; 471 datacache[1].size = (c6>>16) & 0xFFFF; 472 datacache[1].associativity = assocmap[(c6>>12)&0xF]; 473 datacache[1].lineSize = c6 & 0xFF; 474 475 // The L3 cache value is TOTAL, not per core. 476 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. 477 datacache[2].associativity = assocmap[(d6>>12)&0xF]; 478 datacache[2].lineSize = d6 & 0xFF; 479 } 480 } 481 482 // For Intel CoreI7 and later, use function 0x0B 483 // to determine number of processors. 484 void getCpuInfo0B() 485 { 486 int level=0; 487 uint a, b, c, d; 488 do { 489 asm { 490 mov EAX, 0x0B; 491 mov ECX, level; 492 cpuid; 493 mov a, EAX; 494 mov b, EBX; 495 mov c, ECX; 496 mov d, EDX; 497 } 498 if (b!=0) { 499 // I'm not sure about this. The docs state that there 500 // are 2 hyperthreads per core if HT is factory enabled. 501 if (level==0) maxThreads = b & 0xFFFF; 502 else if (level==1) maxCores = b & 0xFFFF; 503 504 } 505 ++level; 506 } while (a!=0 || b!=0); 507 } 508 509 void cpuidX86() 510 { 511 char * venptr = vendorID.ptr; 512 uint a, b, c, d, a2; 513 asm { 514 mov EAX, 0; 515 cpuid; 516 mov a, EAX; 517 mov EAX, venptr; 518 mov [EAX], EBX; 519 mov [EAX + 4], EDX; 520 mov [EAX + 8], ECX; 521 mov EAX, 0x8000_0000; 522 cpuid; 523 mov a2, EAX; 524 } 525 526 max_cpuid = a; 527 max_extended_cpuid = a2; 528 529 probablyIntel = vendorID == "GenuineIntel"; 530 probablyAMD = vendorID == "AuthenticAMD"; 531 uint apic = 0; // brand index, apic id 532 asm { 533 mov EAX, 1; // model, stepping 534 cpuid; 535 mov a, EAX; 536 mov apic, EBX; 537 mov c, ECX; 538 mov d, EDX; 539 } 540 features = d; 541 miscfeatures = c; 542 amdfeatures = 0; 543 amdmiscfeatures = 0; 544 if (max_extended_cpuid >= 0x8000_0001) { 545 asm { 546 mov EAX, 0x8000_0001; 547 cpuid; 548 mov c, ECX; 549 mov d, EDX; 550 } 551 amdmiscfeatures = c; 552 amdfeatures = d; 553 } 554 // Try to detect fraudulent vendorIDs 555 if (amd3dnow) probablyIntel = false; 556 557 stepping = a & 0xF; 558 uint fbase = (a >> 8) & 0xF; 559 uint mbase = (a >> 4) & 0xF; 560 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; 561 model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ? 562 mbase + ((a >> 12) & 0xF0) : mbase; 563 564 if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) { 565 // determine max number of cores for AMD 566 asm { 567 mov EAX, 0x8000_0008; 568 cpuid; 569 mov c, ECX; 570 } 571 uint apicsize = (c>>12) & 0xF; 572 if (apicsize == 0) { 573 // use legacy method 574 if (hyperThreadingBit()) maxCores = c & 0xFF; 575 else maxCores = 1; 576 } else { 577 // maxcores = 2^ apicsize 578 maxCores = 1; 579 while (apicsize) { maxCores<<=1; --apicsize; } 580 } 581 } 582 583 if (max_extended_cpuid >= 0x8000_0004) { 584 char *procptr = processorNameBuffer.ptr; 585 asm { 586 push ESI; 587 mov ESI, procptr; 588 mov EAX, 0x8000_0002; 589 cpuid; 590 mov [ESI], EAX; 591 mov [ESI+4], EBX; 592 mov [ESI+8], ECX; 593 mov [ESI+12], EDX; 594 mov EAX, 0x8000_0003; 595 cpuid; 596 mov [ESI+16], EAX; 597 mov [ESI+20], EBX; 598 mov [ESI+24], ECX; 599 mov [ESI+28], EDX; 600 mov EAX, 0x8000_0004; 601 cpuid; 602 mov [ESI+32], EAX; 603 mov [ESI+36], EBX; 604 mov [ESI+40], ECX; 605 mov [ESI+44], EDX; 606 pop ESI; 607 } 608 // Intel P4 and PM pad at front with spaces. 609 // Other CPUs pad at end with nulls. 610 int start = 0, end = 0; 611 while (processorNameBuffer[start] == ' ') { ++start; } 612 while (processorNameBuffer[$-end-1] == 0) { ++end; } 613 processorName = processorNameBuffer[start..$-end].idup; 614 } else { 615 processorName = "Unknown CPU"; 616 } 617 // Determine cache sizes 618 619 // Intel docs specify that they return 0 for 0x8000_0005. 620 // AMD docs do not specify the behaviour for 0004 and 0002. 621 // Centaur/VIA and most other manufacturers use the AMD method, 622 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! 623 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour 624 // for CPUID80000005. But Geode GX uses the AMD method 625 626 // Deal with idiotic Geode GX1 - make it same as MediaGX MMX. 627 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { 628 max_extended_cpuid = 0x8000_0004; 629 } 630 // Therefore, we try the AMD method unless it's an Intel chip. 631 // If we still have no info, try the Intel methods. 632 datacache[0].size = 0; 633 if (max_cpuid<2 || !probablyIntel) { 634 if (max_extended_cpuid >= 0x8000_0005) { 635 getAMDcacheinfo(); 636 } else if (probablyAMD) { 637 // According to AMDProcRecognitionAppNote, this means CPU 638 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) 639 // Am5x86 has 16Kb 4-way unified data & code cache. 640 datacache[0].size = 8; 641 datacache[0].associativity = 4; 642 datacache[0].lineSize = 32; 643 } else { 644 // Some obscure CPU. 645 // Values for Cyrix 6x86MX (family 6, model 0) 646 datacache[0].size = 64; 647 datacache[0].associativity = 4; 648 datacache[0].lineSize = 32; 649 } 650 } 651 if ((datacache[0].size == 0) && max_cpuid>=4) { 652 getcacheinfoCPUID4(); 653 } 654 if ((datacache[0].size == 0) && max_cpuid>=2) { 655 getcacheinfoCPUID2(); 656 } 657 if (datacache[0].size == 0) { 658 // Pentium, PMMX, late model 486, or an obscure CPU 659 if (mmx) { // Pentium MMX. Also has 8kB code cache. 660 datacache[0].size = 16; 661 datacache[0].associativity = 4; 662 datacache[0].lineSize = 32; 663 } else { // Pentium 1 (which also has 8kB code cache) 664 // or 486. 665 // Cyrix 6x86: 16, 4way, 32 linesize 666 datacache[0].size = 8; 667 datacache[0].associativity = 2; 668 datacache[0].lineSize = 32; 669 } 670 } 671 if (max_cpuid >=0x0B) { 672 // For Intel i7 and later, use function 0x0B to determine 673 // cores and hyperthreads. 674 getCpuInfo0B(); 675 } else { 676 if (hyperThreadingBit()) maxThreads = (apic>>>16) & 0xFF; 677 else maxThreads = maxCores; 678 } 679 } 680 681 // Return true if the cpuid instruction is supported. 682 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. 683 bool hasCPUID() 684 { 685 uint flags; 686 asm { 687 pushfd; 688 pop EAX; 689 mov flags, EAX; 690 xor EAX, 0x0020_0000; 691 push EAX; 692 popfd; 693 pushfd; 694 pop EAX; 695 xor flags, EAX; 696 } 697 return (flags & 0x0020_0000) !=0; 698 } 699 700 } else { // inline asm X86 701 702 bool hasCPUID() { return false; } 703 704 void cpuidX86() 705 { 706 datacache[0].size = 8; 707 datacache[0].associativity = 2; 708 datacache[0].lineSize = 32; 709 } 710 } 711 712 // TODO: Implement this function with OS support 713 void cpuidPPC() 714 { 715 enum :int { PPC601, PPC603, PPC603E, PPC604, 716 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 } 717 718 // TODO: 719 // asm { mfpvr; } returns the CPU version but unfortunately it can 720 // only be used in kernel mode. So OS support is required. 721 int cputype = PPC603; 722 723 // 601 has a 8KB combined data & code L1 cache. 724 uint[] sizes = [4, 8, 16, 16, 32, 32, 32, 32, 64]; 725 ubyte[] ways = [8, 2, 4, 4, 4, 8, 8, 8, 8]; 726 uint[] L2size= [0, 0, 0, 0, 0, 0, 0, 256, 512]; 727 uint[] L3size= [0, 0, 0, 0, 0, 0, 0, 2048, 0]; 728 729 datacache[0].size = sizes[cputype]; 730 datacache[0].associativity = ways[cputype]; 731 datacache[0].lineSize = (cputype==PPCG5)? 128 : 732 (cputype == PPC620 || cputype == PPCG3)? 64 : 32; 733 datacache[1].size = L2size[cputype]; 734 datacache[2].size = L3size[cputype]; 735 datacache[1].lineSize = datacache[0].lineSize; 736 datacache[2].lineSize = datacache[0].lineSize; 737 } 738 739 // TODO: Implement this function with OS support 740 void cpuidSparc() 741 { 742 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way. 743 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192. 744 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way 745 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024. 746 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024. 747 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way. 748 } 749 750 751 shared static this() 752 { 753 if (hasCPUID()) { 754 cpuidX86(); 755 } else { 756 // it's a 386 or 486, or a Cyrix 6x86. 757 //Probably still has an external cache. 758 } 759 if (datacache[0].size==0) { 760 // Guess same as Pentium 1. 761 datacache[0].size = 8; 762 datacache[0].associativity = 2; 763 datacache[0].lineSize = 32; 764 } 765 numCacheLevels = 1; 766 // And now fill up all the unused levels with full memory space. 767 for (int i=1; i< datacache.length; ++i) { 768 if (datacache[i].size==0) { 769 // Set all remaining levels of cache equal to full address space. 770 datacache[i].size = uint.max/1024; 771 datacache[i].associativity = 1; 772 datacache[i].lineSize = datacache[i-1].lineSize; 773 } else numCacheLevels = i+1; 774 } 775 } 776 777 778 779 780 debug (Cpuid) 781 { 782 private import tango.io.Stdout; 783 784 void main() 785 { 786 Stdout.formatln ("{}, {} threads, {} cores", processor, threadsPerCPU, coresPerCPU); 787 } 788 }