1 
2 /* Trace dynamic profiler.
3  * For use with the Digital Mars DMD compiler.
4  * Copyright (C) 1995-2006 by Digital Mars
5  * All Rights Reserved
6  * Written by Walter Bright
7  * www.digitalmars.com
8  */
9 
10 /*
11  *  Modified by Sean Kelly <sean@f4.ca> for use with Tango.
12  */
13 
14 module rt.compiler.dmd.rt.trace;
15 
16 private
17 {
18     import rt.compiler.util.string;
19     import tango.stdc.string : memset, memcpy, strlen;
20     import tango.stdc.stdlib : malloc, free, exit, strtoul, strtoull, qsort,
21                                 EXIT_FAILURE;
22     import tango.stdc.ctype : isspace, isalpha, isgraph;
23     import tango.stdc.stdio : fopen, fclose, fprintf, fgetc, FILE, EOF;
24 }
25 
26 extern (C):
27 
28 char* unmangle_ident(char*);    // from DMC++ runtime library
29 
30 alias long timer_t;
31 
32 /////////////////////////////////////
33 //
34 
35 struct SymPair
36 {
37     SymPair* next;
38     Symbol* sym;        // function that is called
39     uint count;         // number of times sym is called
40 }
41 
42 /////////////////////////////////////
43 // A Symbol for each function name.
44 
45 struct Symbol
46 {
47         Symbol* Sl, Sr;         // left, right children
48         SymPair* Sfanin;        // list of calling functions
49         SymPair* Sfanout;       // list of called functions
50         timer_t totaltime;      // aggregate time
51         timer_t functime;       // time excluding subfunction calls
52         ubyte Sflags;
53         char[] Sident;          // name of symbol
54 }
55 
56 const ubyte SFvisited = 1;      // visited
57 
58 static Symbol* root;            // root of symbol table
59 
60 //////////////////////////////////
61 // Build a linked list of these.
62 
63 struct Stack
64 {
65     Stack* prev;
66     Symbol* sym;
67     timer_t starttime;          // time when function was entered
68     timer_t ohd;                // overhead of all the bookkeeping code
69     timer_t subtime;            // time used by all subfunctions
70 }
71 
72 static Stack* stack_freelist;
73 static Stack* trace_tos;                // top of stack
74 static int trace_inited;                // !=0 if initialized
75 static timer_t trace_ohd;
76 
77 static Symbol** psymbols;
78 static uint nsymbols;           // number of symbols
79 
80 static char[] trace_logfilename = "trace.log";
81 static FILE* fplog;
82 
83 static char[] trace_deffilename = "trace.def";
84 static FILE* fpdef;
85 
86 
87 ////////////////////////////////////////
88 // Set file name for output.
89 // A file name of "" means write results to stdout.
90 // Returns:
91 //      0       success
92 //      !=0     failure
93 
94 int trace_setlogfilename(char[] name)
95 {
96     trace_logfilename = name;
97     return 0;
98 }
99 
100 ////////////////////////////////////////
101 // Set file name for output.
102 // A file name of "" means write results to stdout.
103 // Returns:
104 //      0       success
105 //      !=0     failure
106 
107 int trace_setdeffilename(char[] name)
108 {
109     trace_deffilename = name;
110     return 0;
111 }
112 
113 ////////////////////////////////////////
114 // Output optimal function link order.
115 
116 static void trace_order(Symbol *s)
117 {
118     while (s)
119     {
120         trace_place(s,0);
121         if (s.Sl)
122             trace_order(s.Sl);
123         s = s.Sr;
124     }
125 }
126 
127 //////////////////////////////////////////////
128 //
129 
130 static Stack* stack_malloc()
131 {   Stack *s;
132 
133     if (stack_freelist)
134     {   s = stack_freelist;
135         stack_freelist = s.prev;
136     }
137     else
138         s = cast(Stack *)trace_malloc(Stack.sizeof);
139     return s;
140 }
141 
142 //////////////////////////////////////////////
143 //
144 
145 static void stack_free(Stack *s)
146 {
147     s.prev = stack_freelist;
148     stack_freelist = s;
149 }
150 
151 //////////////////////////////////////
152 // Qsort() comparison routine for array of pointers to SymPair's.
153 
154 static int sympair_cmp(in void* e1, in void* e2)
155 {   SymPair** psp1;
156     SymPair** psp2;
157 
158     psp1 = cast(SymPair**)e1;
159     psp2 = cast(SymPair**)e2;
160 
161     return (*psp2).count - (*psp1).count;
162 }
163 
164 //////////////////////////////////////
165 // Place symbol s, and then place any fan ins or fan outs with
166 // counts greater than count.
167 
168 static void trace_place(Symbol *s, uint count)
169 {   SymPair* sp;
170     SymPair** base;
171 
172     if (!(s.Sflags & SFvisited))
173     {   size_t num;
174         uint u;
175 
176         //printf("\t%.*s\t%u\n", s.Sident, count);
177         fprintf(fpdef,"\t%.*s\n", s.Sident);
178         s.Sflags |= SFvisited;
179 
180         // Compute number of items in array
181         num = 0;
182         for (sp = s.Sfanin; sp; sp = sp.next)
183             num++;
184         for (sp = s.Sfanout; sp; sp = sp.next)
185             num++;
186         if (!num)
187             return;
188 
189         // Allocate and fill array
190         base = cast(SymPair**)trace_malloc(SymPair.sizeof * num);
191         u = 0;
192         for (sp = s.Sfanin; sp; sp = sp.next)
193             base[u++] = sp;
194         for (sp = s.Sfanout; sp; sp = sp.next)
195             base[u++] = sp;
196 
197         // Sort array
198         qsort(base, num, (SymPair *).sizeof, &sympair_cmp);
199 
200         //for (u = 0; u < num; u++)
201             //printf("\t\t%.*s\t%u\n", base[u].sym.Sident, base[u].count);
202 
203         // Place symbols
204         for (u = 0; u < num; u++)
205         {
206             if (base[u].count >= count)
207             {   uint u2;
208                 uint c2;
209 
210                 u2 = (u + 1 < num) ? u + 1 : u;
211                 c2 = base[u2].count;
212                 if (c2 < count)
213                     c2 = count;
214                 trace_place(base[u].sym,c2);
215             }
216             else
217                 break;
218         }
219 
220         // Clean up
221         trace_free(base);
222     }
223 }
224 
225 /////////////////////////////////////
226 // Initialize and terminate.
227 
228 static this()
229 {
230     trace_init();
231 }
232 
233 static ~this()
234 {
235     trace_term();
236 }
237 
238 ///////////////////////////////////
239 // Report results.
240 // Also compute nsymbols.
241 
242 static void trace_report(Symbol* s)
243 {   SymPair* sp;
244     uint count;
245 
246     //printf("trace_report()\n");
247     while (s)
248     {   nsymbols++;
249         if (s.Sl)
250             trace_report(s.Sl);
251         fprintf(fplog,"------------------\n");
252         count = 0;
253         for (sp = s.Sfanin; sp; sp = sp.next)
254         {
255             fprintf(fplog,"\t%5d\t%.*s\n", sp.count, sp.sym.Sident);
256             count += sp.count;
257         }
258         fprintf(fplog,"%.*s\t%u\t%lld\t%lld\n",s.Sident,count,s.totaltime,s.functime);
259         for (sp = s.Sfanout; sp; sp = sp.next)
260         {
261             fprintf(fplog,"\t%5d\t%.*s\n",sp.count,sp.sym.Sident);
262         }
263         s = s.Sr;
264     }
265 }
266 
267 ////////////////////////////////////
268 // Allocate and fill array of symbols.
269 
270 static void trace_array(Symbol *s)
271 {   static uint u;
272 
273     if (!psymbols)
274     {   u = 0;
275         psymbols = cast(Symbol **)trace_malloc((Symbol *).sizeof * nsymbols);
276     }
277     while (s)
278     {
279         psymbols[u++] = s;
280         trace_array(s.Sl);
281         s = s.Sr;
282     }
283 }
284 
285 
286 //////////////////////////////////////
287 // Qsort() comparison routine for array of pointers to Symbol's.
288 
289 static int symbol_cmp(in void* e1, in void* e2)
290 {   Symbol** ps1;
291     Symbol** ps2;
292     timer_t diff;
293 
294     ps1 = cast(Symbol **)e1;
295     ps2 = cast(Symbol **)e2;
296 
297     diff = (*ps2).functime - (*ps1).functime;
298     return (diff == 0) ? 0 : ((diff > 0) ? 1 : -1);
299 }
300 
301 
302 ///////////////////////////////////
303 // Report function timings
304 
305 static void trace_times(Symbol* root)
306 {   uint u;
307     timer_t freq;
308 
309     // Sort array
310     qsort(psymbols, nsymbols, (Symbol *).sizeof, &symbol_cmp);
311 
312     // Print array
313     QueryPerformanceFrequency(&freq);
314     fprintf(fplog,"\n======== Timer Is %lld Ticks/Sec, Times are in Microsecs ========\n\n",freq);
315     fprintf(fplog,"  Num          Tree        Func        Per\n");
316     fprintf(fplog,"  Calls        Time        Time        Call\n\n");
317     for (u = 0; u < nsymbols; u++)
318     {   Symbol* s = psymbols[u];
319         timer_t tl,tr;
320         timer_t fl,fr;
321         timer_t pl,pr;
322         timer_t percall;
323         SymPair* sp;
324         uint calls;
325         char[] id;
326 
327         version (Win32)
328         {
329             char* p = (s.Sident ~ '\0').ptr;
330             p = unmangle_ident(p);
331             if (p)
332                 id = p[0 .. strlen(p)];
333         }
334         if (!id)
335             id = s.Sident;
336         calls = 0;
337         for (sp = s.Sfanin; sp; sp = sp.next)
338             calls += sp.count;
339         if (calls == 0)
340             calls = 1;
341 
342 version (all)
343 {
344         tl = (s.totaltime * 1000000) / freq;
345         fl = (s.functime  * 1000000) / freq;
346         percall = s.functime / calls;
347         pl = (s.functime * 1000000) / calls / freq;
348 
349         fprintf(fplog,"%7d%12lld%12lld%12lld     %.*s\n",
350             calls,tl,fl,pl,id);
351 }
352 else
353 {
354         tl = s.totaltime / freq;
355         tr = ((s.totaltime - tl * freq) * 10000000) / freq;
356 
357         fl = s.functime  / freq;
358         fr = ((s.functime  - fl * freq) * 10000000) / freq;
359 
360         percall = s.functime / calls;
361         pl = percall  / freq;
362         pr = ((percall  - pl * freq) * 10000000) / freq;
363 
364         fprintf(fplog,"%7d\t%3lld.%07lld\t%3lld.%07lld\t%3lld.%07lld\t%.*s\n",
365             calls,tl,tr,fl,fr,pl,pr,id);
366 }
367         if (id !is s.Sident)
368             free(id.ptr);
369     }
370 }
371 
372 
373 ///////////////////////////////////
374 // Initialize.
375 
376 static void trace_init()
377 {
378     if (!trace_inited)
379     {
380         trace_inited = 1;
381 
382         {   // See if we can determine the overhead.
383             uint u;
384             timer_t starttime;
385             timer_t endtime;
386             Stack *st;
387 
388             st = trace_tos;
389             trace_tos = null;
390             QueryPerformanceCounter(&starttime);
391             for (u = 0; u < 100; u++)
392             {
393                 asm
394                 {
395                     call _trace_pro_n   ;
396                     db   0              ;
397                     call _trace_epi_n   ;
398                 }
399             }
400             QueryPerformanceCounter(&endtime);
401             trace_ohd = (endtime - starttime) / u;
402             //printf("trace_ohd = %lld\n",trace_ohd);
403             if (trace_ohd > 0)
404                 trace_ohd--;            // round down
405             trace_tos = st;
406         }
407     }
408 }
409 
410 /////////////////////////////////
411 // Terminate.
412 
413 void trace_term()
414 {
415     //printf("trace_term()\n");
416     if (trace_inited == 1)
417     {   Stack *n;
418 
419         trace_inited = 2;
420 
421         // Free remainder of the stack
422         while (trace_tos)
423         {
424             n = trace_tos.prev;
425             stack_free(trace_tos);
426             trace_tos = n;
427         }
428 
429         while (stack_freelist)
430         {
431             n = stack_freelist.prev;
432             stack_free(stack_freelist);
433             stack_freelist = n;
434         }
435 
436         // Merge in data from any existing file
437         trace_merge();
438 
439         // Report results
440         fplog = fopen(trace_logfilename.ptr, "w");
441         //fplog = tango.stdc.stdio.stdout;
442         if (fplog)
443         {   nsymbols = 0;
444             trace_report(root);
445             trace_array(root);
446             trace_times(root);
447             fclose(fplog);
448         }
449 
450         // Output function link order
451         fpdef = fopen(trace_deffilename.ptr,"w");
452         if (fpdef)
453         {   fprintf(fpdef,"\nFUNCTIONS\n");
454             trace_order(root);
455             fclose(fpdef);
456         }
457 
458         trace_free(psymbols);
459         psymbols = null;
460     }
461 }
462 
463 /////////////////////////////////
464 // Our storage allocator.
465 
466 static void *trace_malloc(size_t nbytes)
467 {   void *p;
468 
469     p = malloc(nbytes);
470     if (!p)
471         exit(EXIT_FAILURE);
472     return p;
473 }
474 
475 static void trace_free(void *p)
476 {
477     free(p);
478 }
479 
480 //////////////////////////////////////////////
481 //
482 
483 static Symbol* trace_addsym(char[] id)
484 {
485     Symbol** parent;
486     Symbol* rover;
487     Symbol* s;
488     int cmp;
489     char c;
490 
491     //printf("trace_addsym('%s',%d)\n",p,len);
492     parent = &root;
493     rover = *parent;
494     while (rover !is null)               // while we haven't run out of tree
495     {
496         cmp = stringCompare (id, rover.Sident);
497         if (cmp == 0)
498         {
499             return rover;
500         }
501         parent = (cmp < 0) ?            /* if we go down left side      */
502             &(rover.Sl) :               /* then get left child          */
503             &(rover.Sr);                /* else get right child         */
504         rover = *parent;                /* get child                    */
505     }
506     /* not in table, so insert into table       */
507     s = cast(Symbol *)trace_malloc(Symbol.sizeof);
508     memset(s,0,Symbol.sizeof);
509     s.Sident = id;
510     *parent = s;                        // link new symbol into tree
511     return s;
512 }
513 
514 /***********************************
515  * Add symbol s with count to SymPair list.
516  */
517 
518 static void trace_sympair_add(SymPair** psp, Symbol* s, uint count)
519 {   SymPair* sp;
520 
521     for (; 1; psp = &sp.next)
522     {
523         sp = *psp;
524         if (!sp)
525         {
526             sp = cast(SymPair *)trace_malloc(SymPair.sizeof);
527             sp.sym = s;
528             sp.count = 0;
529             sp.next = null;
530             *psp = sp;
531             break;
532         }
533         else if (sp.sym == s)
534         {
535             break;
536         }
537     }
538     sp.count += count;
539 }
540 
541 //////////////////////////////////////////////
542 //
543 
544 static void trace_pro(char[] id)
545 {
546     Stack* n;
547     Symbol* s;
548     timer_t starttime;
549     timer_t t;
550 
551     QueryPerformanceCounter(&starttime);
552     if (id.length == 0)
553         return;
554     if (!trace_inited)
555         trace_init();                   // initialize package
556     n = stack_malloc();
557     n.prev = trace_tos;
558     trace_tos = n;
559     s = trace_addsym(id);
560     trace_tos.sym = s;
561     if (trace_tos.prev)
562     {
563         Symbol* prev;
564         int i;
565 
566         // Accumulate Sfanout and Sfanin
567         prev = trace_tos.prev.sym;
568         trace_sympair_add(&prev.Sfanout,s,1);
569         trace_sympair_add(&s.Sfanin,prev,1);
570     }
571     QueryPerformanceCounter(&t);
572     trace_tos.starttime = starttime;
573     trace_tos.ohd = trace_ohd + t - starttime;
574     trace_tos.subtime = 0;
575     //printf("trace_tos.ohd=%lld, trace_ohd=%lld + t=%lld - starttime=%lld\n",
576     //  trace_tos.ohd,trace_ohd,t,starttime);
577 }
578 
579 /////////////////////////////////////////
580 //
581 
582 static void trace_epi()
583 {   Stack* n;
584     timer_t endtime;
585     timer_t t;
586     timer_t ohd;
587 
588     //printf("trace_epi()\n");
589     if (trace_tos)
590     {
591         timer_t starttime;
592         timer_t totaltime;
593 
594         QueryPerformanceCounter(&endtime);
595         starttime = trace_tos.starttime;
596         totaltime = endtime - starttime - trace_tos.ohd;
597         if (totaltime < 0)
598         {   //printf("endtime=%lld - starttime=%lld - trace_tos.ohd=%lld < 0\n",
599             //  endtime,starttime,trace_tos.ohd);
600             totaltime = 0;              // round off error, just make it 0
601         }
602 
603         // totaltime is time spent in this function + all time spent in
604         // subfunctions - bookkeeping overhead.
605         trace_tos.sym.totaltime += totaltime;
606 
607         //if (totaltime < trace_tos.subtime)
608         //printf("totaltime=%lld < trace_tos.subtime=%lld\n",totaltime,trace_tos.subtime);
609         trace_tos.sym.functime  += totaltime - trace_tos.subtime;
610         ohd = trace_tos.ohd;
611         n = trace_tos.prev;
612         stack_free(trace_tos);
613         trace_tos = n;
614         if (n)
615         {   QueryPerformanceCounter(&t);
616             n.ohd += ohd + t - endtime;
617             n.subtime += totaltime;
618             //printf("n.ohd = %lld\n",n.ohd);
619         }
620     }
621 }
622 
623 
624 ////////////////////////// FILE INTERFACE /////////////////////////
625 
626 /////////////////////////////////////
627 // Read line from file fp.
628 // Returns:
629 //      trace_malloc'd line buffer
630 //      null if end of file
631 
632 static char* trace_readline(FILE* fp)
633 {   int c;
634     int dim;
635     int i;
636     char *buf;
637 
638     //printf("trace_readline(%p)\n", fp);
639     i = 0;
640     dim = 0;
641     buf = null;
642     while (1)
643     {
644         if (i == dim)
645         {   char *p;
646 
647             dim += 80;
648             p = cast(char *)trace_malloc(dim);
649             memcpy(p,buf,i);
650             trace_free(buf);
651             buf = p;
652         }
653         c = fgetc(fp);
654         switch (c)
655         {
656             case EOF:
657                 if (i == 0)
658                 {   trace_free(buf);
659                     return null;
660                 }
661             case '\n':
662                 goto L1;
663             default:
664                 break;
665         }
666         buf[i] = cast(char)c;
667         i++;
668     }
669 L1:
670     buf[i] = 0;
671     //printf("line '%s'\n",buf);
672     return buf;
673 }
674 
675 //////////////////////////////////////
676 // Skip space
677 
678 static char *skipspace(char *p)
679 {
680     while (isspace(*p))
681         p++;
682     return p;
683 }
684 
685 ////////////////////////////////////////////////////////
686 // Merge in profiling data from existing file.
687 
688 static void trace_merge()
689 {   FILE* fp;
690     char *buf;
691     char *p;
692     uint count;
693     Symbol *s;
694     SymPair *sfanin;
695     SymPair **psp;
696 
697     if (trace_logfilename && (fp = fopen(trace_logfilename.ptr,"r")) !is null)
698     {
699         buf = null;
700         sfanin = null;
701         psp = &sfanin;
702         while (1)
703         {
704             trace_free(buf);
705             buf = trace_readline(fp);
706             if (!buf)
707                 break;
708             switch (*buf)
709             {
710                 case '=':               // ignore rest of file
711                     trace_free(buf);
712                     goto L1;
713                 case ' ':
714                 case '\t':              // fan in or fan out line
715                     count = strtoul(buf,&p,10);
716                     if (p == buf)       // if invalid conversion
717                         continue;
718                     p = skipspace(p);
719                     if (!*p)
720                         continue;
721                     s = trace_addsym(p[0 .. strlen(p)]);
722                     trace_sympair_add(psp,s,count);
723                     break;
724                 default:
725                     if (!isalpha(*buf))
726                     {
727                         if (!sfanin)
728                             psp = &sfanin;
729                         continue;       // regard unrecognized line as separator
730                     }
731                 case '?':
732                 case '_':
733                 case '$':
734                 case '@':
735                     p = buf;
736                     while (isgraph(*p))
737                         p++;
738                     *p = 0;
739                     //printf("trace_addsym('%s')\n",buf);
740                     s = trace_addsym(buf[0 .. strlen(buf)]);
741                     if (s.Sfanin)
742                     {   SymPair *sp;
743 
744                         for (; sfanin; sfanin = sp)
745                         {
746                             trace_sympair_add(&s.Sfanin,sfanin.sym,sfanin.count);
747                             sp = sfanin.next;
748                             trace_free(sfanin);
749                         }
750                     }
751                     else
752                     {   s.Sfanin = sfanin;
753                     }
754                     sfanin = null;
755                     psp = &s.Sfanout;
756 
757                     {   timer_t t;
758 
759                         p++;
760                         count = strtoul(p,&p,10);
761                         t = cast(long)strtoull(p,&p,10);
762                         s.totaltime += t;
763                         t = cast(long)strtoull(p,&p,10);
764                         s.functime += t;
765                     }
766                     break;
767             }
768         }
769     L1:
770         fclose(fp);
771     }
772 }
773 
774 ////////////////////////// COMPILER INTERFACE /////////////////////
775 
776 /////////////////////////////////////////////
777 // Function called by trace code in function prolog.
778 
779 void _trace_pro_n()
780 {
781     /* Length of string is either:
782      *  db      length
783      *  ascii   string
784      * or:
785      *  db      0x0FF
786      *  db      0
787      *  dw      length
788      *  ascii   string
789      */
790   version (OSX)
791   { // 16 byte align stack
792    version (D_InlineAsm_X86)
793     asm
794     {   naked                           ;
795         pushad                          ;
796         mov     ECX,8*4[ESP]            ;
797         xor     EAX,EAX                 ;
798         mov     AL,[ECX]                ;
799         cmp     AL,0xFF                 ;
800         jne     L1                      ;
801         cmp     byte ptr 1[ECX],0       ;
802         jne     L1                      ;
803         mov     AX,2[ECX]               ;
804         add     8*4[ESP],3              ;
805         add     ECX,3                   ;
806     L1: inc     EAX                     ;
807         inc     ECX                     ;
808         add     8*4[ESP],EAX            ;
809         dec     EAX                     ;
810         sub     ESP,4                   ;
811         push    ECX                     ;
812         push    EAX                     ;
813         call    trace_pro               ;
814         add     ESP,12                  ;
815         popad                           ;
816         ret                             ;
817     }
818       else version (D_InlineAsm_X86_64)
819             static assert(0);
820       else
821             static assert(0);
822   }
823   else
824   {
825    version (D_InlineAsm_X86)
826     asm
827     {   naked                           ;
828         pushad                          ;
829         mov     ECX,8*4[ESP]            ;
830         xor     EAX,EAX                 ;
831         mov     AL,[ECX]                ;
832         cmp     AL,0xFF                 ;
833         jne     L1                      ;
834         cmp     byte ptr 1[ECX],0       ;
835         jne     L1                      ;
836         mov     AX,2[ECX]               ;
837         add     8*4[ESP],3              ;
838         add     ECX,3                   ;
839     L1: inc     EAX                     ;
840         inc     ECX                     ;
841         add     8*4[ESP],EAX            ;
842         dec     EAX                     ;
843         push    ECX                     ;
844         push    EAX                     ;
845         call    trace_pro               ;
846         add     ESP,8                   ;
847         popad                           ;
848         ret                             ;
849     }
850     else version (D_InlineAsm_X86_64)
851     asm
852     {   naked                           ;
853         push    RAX                     ;
854         push    RCX                     ;
855         push    RDX                     ;
856         push    RSI                     ;
857         push    RDI                     ;
858         push    R8                      ;
859         push    R9                      ;
860         push    R10                     ;
861         push    R11                     ;
862         mov     RCX,9*8[RSP]            ;
863         xor     RAX,RAX                 ;
864         mov     AL,[RCX]                ;
865         cmp     AL,0xFF                 ;
866         jne     L1                      ;
867         cmp     byte ptr 1[RCX],0       ;
868         jne     L1                      ;
869         mov     AX,2[RCX]               ;
870         add     9*8[RSP],3              ;
871         add     RCX,3                   ;
872     L1: inc     RAX                     ;
873         inc     RCX                     ;
874         add     9*8[RSP],RAX            ;
875         dec     RAX                     ;
876         push    RCX                     ;
877         push    RAX                     ;
878         call    trace_pro               ;
879         add     RSP,16                  ;
880         pop     R11                     ;
881         pop     R10                     ;
882         pop     R8                      ;
883         pop     R9                      ;
884         pop     RDI                     ;
885         pop     RSI                     ;
886         pop     RDX                     ;
887         pop     RCX                     ;
888         pop     RAX                     ;
889         ret                             ;
890     }
891     else
892             static assert(0);
893   }
894 }
895 
896 /////////////////////////////////////////////
897 // Function called by trace code in function epilog.
898 
899 
900 void _trace_epi_n()
901 {
902     version (OSX) { // 16 byte align stack
903         asm{
904             naked   ;
905             pushad  ;
906             sub ESP,12  ;
907         }
908         trace_epi();
909         asm {
910             add ESP,12  ;
911             popad   ;
912             ret ;
913         }
914     }
915     else {
916         version (D_InlineAsm_X86_64)
917         {
918             asm 
919             {
920                 naked   ;
921                 push RAX ;
922                 push RBX ;
923                 push RCX ;
924                 push RDX ;
925                 push RSI ;
926                 push RDI ;
927                 push RBP ;
928                 push R8  ;
929                 push R9  ;
930                 push R10  ;
931                 push R11  ;
932                 push R12  ;
933                 push R13  ;
934                 push R14  ;
935                 push R15  ;
936                 push RAX ;
937             }
938         }
939         else
940         {
941             asm 
942             {
943                 naked   ;
944                 pushad  ;
945             }
946         }
947         
948         trace_epi();
949         version(D_InlineAsm_X86_64)
950         {
951             asm
952             {
953                 pop RAX ;   // 16 byte align the stack
954                 pop R15 ;
955                 pop R14 ;
956                 pop R13 ;
957                 pop R12 ;
958                 pop R11 ;
959                 pop R10 ;
960                 pop R9  ;
961                 pop R8  ;
962                 pop RBP ;
963                 pop RDI ;
964                 pop RSI ;
965                 pop RDX ;
966                 pop RCX ;
967                 pop RBX ;
968                 pop RAX ;
969                 ret     ;
970             }
971         }
972         else
973         {
974             asm
975             {
976                 popad   ;
977                 ret     ;
978             }
979         }
980     }
981 }
982 
983 
984 version (Win32)
985 {
986     extern (Windows)
987     {
988         export int QueryPerformanceCounter(timer_t *);
989         export int QueryPerformanceFrequency(timer_t *);
990     }
991 }
992 else version (X86)
993 {
994     extern (D)
995     {
996         void QueryPerformanceCounter(timer_t* ctr)
997         {
998             asm
999             {   naked                   ;
1000                 mov       ECX,EAX       ;
1001                 rdtsc                   ;
1002                 mov   [ECX],EAX         ;
1003                 mov   4[ECX],EDX        ;
1004                 ret                     ;
1005             }
1006         }
1007 
1008         void QueryPerformanceFrequency(timer_t* freq)
1009         {
1010             *freq = 3579545;
1011         }
1012     }
1013 }
1014 else version (D_InlineAsm_X86_64)
1015 {
1016     extern (D)
1017     {
1018         void QueryPerformanceCounter(timer_t* ctr)
1019         {
1020             asm
1021             {
1022                 naked                   ;
1023                 rdtsc                   ;
1024                 mov   [RDI],EAX         ;
1025                 mov   4[RDI],EDX        ;
1026                 ret                     ;
1027             }
1028         }
1029 
1030         void QueryPerformanceFrequency(timer_t* freq)
1031         {
1032             *freq = 3579545;
1033         }
1034     }
1035 }
1036 else
1037 {
1038     static assert(0);
1039 }