1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6         
7         version:        Initial release: May 2005      
8       
9         author:         Kris
10 
11         Converts between native and text representations of HTTP time
12         values. Internally, time is represented as UTC with an epoch 
13         fixed at Jan 1st 1970. The text representation is formatted in
14         accordance with RFC 1123, and the parser will accept one of 
15         RFC 1123, RFC 850, or asctime formats.
16 
17         See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html for
18         further detail.
19 
20         Applying the D "import alias" mechanism to this module is highly
21         recommended, in order to limit namespace pollution:
22         ---
23         import TimeStamp = tango.text.convert.TimeStamp;
24 
25         auto t = TimeStamp.parse ("Sun, 06 Nov 1994 08:49:37 GMT");
26         ---
27         
28 *******************************************************************************/
29 
30 module tango.text.convert.TimeStamp;
31 
32 private import tango.time.Time;
33 
34 private import tango.core.Exception;
35 
36 private import Util = tango.text.Util;
37 
38 private import tango.time.chrono.Gregorian;
39 
40 private import Integer = tango.text.convert.Integer;
41 
42 /******************************************************************************
43 
44         Parse provided input and return a UTC epoch time. An exception
45         is raised where the provided string is not fully parsed.
46 
47 ******************************************************************************/
48 
49 ulong toTime(T) (T[] src)
50 {
51         uint len;
52 
53         auto x = parse (src, &len);
54         if (len < src.length)
55             throw new IllegalArgumentException ("unknown time format: "~src.idup);
56         return x;
57 }
58 
59 /******************************************************************************
60 
61         Template wrapper to make life simpler. Returns a text version
62         of the provided value.
63 
64         See format() for details
65 
66 ******************************************************************************/
67 
68 char[] toString (Time time)
69 {
70         char[32] tmp = void;
71         
72         return format (tmp, time).dup;
73 }
74                
75 /******************************************************************************
76 
77         Template wrapper to make life simpler. Returns a text version
78         of the provided value.
79 
80         See format() for details
81 
82 ******************************************************************************/
83 
84 wchar[] toString16 (Time time)
85 {
86         wchar[32] tmp = void;
87         
88         return format (tmp, time).dup;
89 }
90                
91 /******************************************************************************
92 
93         Template wrapper to make life simpler. Returns a text version
94         of the provided value.
95 
96         See format() for details
97 
98 ******************************************************************************/
99 
100 dchar[] toString32 (Time time)
101 {
102         dchar[32] tmp = void;
103         
104         return format (tmp, time).dup;
105 }
106                
107 /******************************************************************************
108 
109         RFC1123 formatted time
110 
111         Converts to the format "Sun, 06 Nov 1994 08:49:37 GMT", and
112         returns a populated slice of the provided buffer. Note that
113         RFC1123 format is always in absolute GMT time, and a thirty-
114         element buffer is sufficient for the produced output
115 
116         Throws an exception where the supplied time is invalid
117 
118 ******************************************************************************/
119 
120 T[] format(T, U=Time) (T[] output, U t)
121 {return format!(T)(output, cast(Time) t);}
122 
123 T[] format(T) (T[] output, Time t)
124 {
125         __gshared immutable const(T)[][] Months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
126                                                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
127         __gshared immutable const(T)[][] Days   = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
128 
129         T[] convert (T[] tmp, long i)
130         {
131                 return Integer.formatter!(T) (tmp, i, 'u', 0, 8);
132         }
133 
134         assert (output.length >= 29);
135         if (t is t.max)
136             throw new IllegalArgumentException ("TimeStamp.format :: invalid Time argument");
137 
138         // convert time to field values
139         auto time = t.time;
140         auto date = Gregorian.generic.toDate (t);
141 
142         // use the featherweight formatter ...
143         T[14] tmp = void;
144         return Util.layout (output, cast(const(T)[])"%0, %1 %2 %3 %4:%5:%6 GMT", 
145                             Days[date.dow],
146                             convert (tmp[0..2], date.day),
147                             Months[date.month-1],
148                             convert (tmp[2..6], date.year),
149                             convert (tmp[6..8], time.hours),
150                             convert (tmp[8..10], time.minutes),
151                             convert (tmp[10..12], time.seconds)
152                            );
153 }
154 
155 
156 /******************************************************************************
157 
158         ISO-8601 format :: "2006-01-31T14:49:30Z"
159 
160         Throws an exception where the supplied time is invalid
161 
162 ******************************************************************************/
163 
164 T[] format8601(T, U=Time) (T[] output, U t)
165 {return format!(T)(output, cast(Time) t);}
166 
167 T[] format8601(T) (T[] output, Time t)
168 {
169         T[] convert (T[] tmp, long i)
170         {
171                 return Integer.formatter!(T) (tmp, i, 'u', 0, 8);
172         }
173 
174 
175         assert (output.length >= 29);
176         if (t is t.max)
177             throw new IllegalArgumentException ("TimeStamp.format :: invalid Time argument");
178 
179         // convert time to field values
180         auto time = t.time;
181         auto date = Gregorian.generic.toDate (t);
182 
183         // use the featherweight formatter ...
184         T[20] tmp = void;
185         return Util.layout (output, cast(const(T)[]) "%0-%1-%2T%3%:%4:%5Z", 
186                             convert (tmp[0..4], date.year),
187                             convert (tmp[4..6], date.month),
188                             convert (tmp[6..8], date.day),
189                             convert (tmp[8..10], time.hours),
190                             convert (tmp[10..12], time.minutes),
191                             convert (tmp[12..14], time.seconds)
192                            );
193 }
194 
195 /******************************************************************************
196 
197       Parse provided input and return a UTC epoch time. A return value 
198       of Time.max (or false, respectively) indicated a parse-failure.
199 
200       An option is provided to return the count of characters parsed - 
201       an unchanged value here also indicates invalid input.
202 
203 ******************************************************************************/
204 
205 Time parse(T) (T[] src, size_t* ate = null)
206 {
207         size_t len;
208         Time   value;
209 
210         if ((len = rfc1123 (src, value)) > 0 || 
211             (len = rfc850  (src, value)) > 0 || 
212             (len = iso8601  (src, value)) > 0 || 
213             (len = dostime  (src, value)) > 0 || 
214             (len = asctime (src, value)) > 0)
215            {
216            if (ate)
217                *ate = len;
218            return value;
219            }
220         return Time.max;
221 }
222 
223 
224 /******************************************************************************
225 
226       Parse provided input and return a UTC epoch time. A return value 
227       of Time.max (or false, respectively) indicated a parse-failure.
228 
229       An option is provided to return the count of characters parsed - 
230       an unchanged value here also indicates invalid input.
231 
232 ******************************************************************************/
233 
234 bool parse(T) (T[] src, ref TimeOfDay tod, ref Date date, size_t* ate = null)
235 {
236         size_t len;
237     
238         if ((len = rfc1123 (src, tod, date)) > 0 || 
239            (len = rfc850   (src, tod, date)) > 0 || 
240            (len = iso8601  (src, tod, date)) > 0 || 
241            (len = dostime  (src, tod, date)) > 0 || 
242            (len = asctime (src, tod, date)) > 0)
243            {
244            if (ate)
245                *ate = len;
246            return true;
247            }
248         return false;
249 }
250 
251 /******************************************************************************
252 
253         RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
254 
255         Returns the number of elements consumed by the parse; zero if
256         the parse failed
257 
258 ******************************************************************************/
259 
260 size_t rfc1123(T) (T[] src, ref Time value)
261 {
262         TimeOfDay tod;
263         Date      date;
264 
265         auto r = rfc1123!(T)(src, tod, date);
266         if (r)   
267             value = Gregorian.generic.toTime(date, tod);
268         return r;
269 }
270 
271 
272 /******************************************************************************
273 
274         RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
275 
276         Returns the number of elements consumed by the parse; zero if
277         the parse failed
278 
279 ******************************************************************************/
280 
281 size_t rfc1123(T) (T[] src, ref TimeOfDay tod, ref Date date)
282 {
283         T* p = src.ptr;
284         T* e = p + src.length;
285 
286         bool dt (ref T* p)
287         {
288                 return ((date.day = parseInt(p, e)) > 0  &&
289                          *p++ == ' '                     &&
290                         (date.month = parseMonth(p)) > 0 &&
291                          *p++ == ' '                     &&
292                         (date.year = parseInt(p, e)) > 0);
293         }
294 
295         if (parseShortDay(p) >= 0 &&
296             *p++ == ','           &&
297             *p++ == ' '           &&
298             dt (p)                &&
299             *p++ == ' '           &&
300             time (tod, p, e)      &&
301             *p++ == ' '           &&
302             p[0..3] == "GMT")
303             {
304             return cast(size_t) ((p+3) - src.ptr);
305             }
306         return 0;
307 }
308 
309 
310 /******************************************************************************
311 
312         RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
313 
314         Returns the number of elements consumed by the parse; zero if
315         the parse failed
316 
317 ******************************************************************************/
318 
319 size_t rfc850(T) (T[] src, ref Time value)
320 {
321         TimeOfDay tod;
322         Date      date;
323 
324         auto r = rfc850!(T)(src, tod, date);
325         if (r)
326             value = Gregorian.generic.toTime (date, tod);
327         return r;
328 }
329 
330 /******************************************************************************
331 
332         RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
333 
334         Returns the number of elements consumed by the parse; zero if
335         the parse failed
336 
337 ******************************************************************************/
338 
339 size_t rfc850(T) (T[] src, ref TimeOfDay tod, ref Date date)
340 {
341         T* p = src.ptr;
342         T* e = p + src.length;
343 
344         bool dt (ref T* p)
345         {
346                 return ((date.day = parseInt(p, e)) > 0  &&
347                          *p++ == '-'                     &&
348                         (date.month = parseMonth(p)) > 0 &&
349                          *p++ == '-'                     &&
350                         (date.year = parseInt(p, e)) > 0);
351         }
352 
353         if (parseFullDay(p) >= 0 &&
354             *p++ == ','          &&
355             *p++ == ' '          &&
356             dt (p)               &&
357             *p++ == ' '          &&
358             time (tod, p, e)     &&
359             *p++ == ' '          &&
360             p[0..3] == "GMT")
361             {
362             if (date.year < 70)
363                 date.year += 2000;
364             else
365                if (date.year < 100)
366                    date.year += 1900;
367 
368             return cast(size_t) ((p+3) - src.ptr);
369             }
370         return 0;
371 }
372 
373 
374 /******************************************************************************
375 
376         ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
377 
378         Returns the number of elements consumed by the parse; zero if
379         the parse failed
380 
381 ******************************************************************************/
382 
383 size_t asctime(T) (T[] src, ref Time value)
384 {
385         TimeOfDay tod;
386         Date      date;
387     
388         auto r = asctime!(T)(src, tod, date);
389         if (r)
390             value = Gregorian.generic.toTime (date, tod);
391         return r;
392 }
393 
394 /******************************************************************************
395 
396         ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
397 
398         Returns the number of elements consumed by the parse; zero if
399         the parse failed
400 
401 ******************************************************************************/
402 
403 size_t asctime(T) (T[] src, ref TimeOfDay tod, ref Date date)
404 {
405         T* p = src.ptr;
406         T* e = p + src.length;
407 
408         bool dt (ref T* p)
409         {
410                 return ((date.month = parseMonth(p)) > 0  &&
411                          *p++ == ' '                      &&
412                         ((date.day = parseInt(p, e)) > 0  ||
413                         (*p++ == ' '                      &&
414                         (date.day = parseInt(p, e)) > 0)));
415         }
416 
417         if (parseShortDay(p) >= 0 &&
418             *p++ == ' '           &&
419             dt (p)                &&
420             *p++ == ' '           &&
421             time (tod, p, e)      &&
422             *p++ == ' '           &&
423             (date.year = parseInt (p, e)) > 0)
424             {
425             return cast(size_t) (p - src.ptr);
426             }
427         return 0;
428 }
429 
430 /******************************************************************************
431 
432         DOS time format :: "12-31-06 08:49AM"
433 
434         Returns the number of elements consumed by the parse; zero if
435         the parse failed
436 
437 ******************************************************************************/
438 
439 size_t dostime(T) (T[] src, ref Time value)
440 {
441         TimeOfDay tod;
442         Date      date;
443     
444         auto r = dostime!(T)(src, tod, date);
445         if (r)
446             value = Gregorian.generic.toTime(date, tod);
447         return r;
448 }
449 
450 
451 /******************************************************************************
452 
453         DOS time format :: "12-31-06 08:49AM"
454 
455         Returns the number of elements consumed by the parse; zero if
456         the parse failed
457 
458 ******************************************************************************/
459 
460 size_t dostime(T) (T[] src, ref TimeOfDay tod, ref Date date)
461 {
462         T* p = src.ptr;
463         T* e = p + src.length;
464 
465         bool dt (ref T* p)
466         {
467                 return ((date.month = parseInt(p, e)) > 0 &&
468                          *p++ == '-'                      &&
469                         ((date.day = parseInt(p, e)) > 0  &&
470                         (*p++ == '-'                      &&
471                         (date.year = parseInt(p, e)) > 0)));
472         }
473 
474         if (dt(p) >= 0                         &&
475             *p++ == ' '                        &&
476             (tod.hours = parseInt(p, e)) > 0   &&
477             *p++ == ':'                        &&
478             (tod.minutes = parseInt(p, e)) > 0 &&
479             (*p == 'A' || *p == 'P'))
480             {
481             if (*p is 'P')
482                 tod.hours += 12;
483             
484             if (date.year < 70)
485                 date.year += 2000;
486             else
487                if (date.year < 100)
488                    date.year += 1900;
489             
490             return cast(size_t) ((p+2) - src.ptr);
491             }
492         return 0;
493 }
494 
495 /******************************************************************************
496 
497         ISO-8601 format :: "2006-01-31 14:49:30,001"
498 
499         Returns the number of elements consumed by the parse; zero if
500         the parse failed
501 
502         Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
503         "Decimal fractions may also be added to any of the three time elements.
504         A decimal point, either a comma or a dot (without any preference as
505         stated most recently in resolution 10 of the 22nd General Conference
506         CGPM in 2003), is used as a separator between the time element and
507         its fraction."
508 
509 ******************************************************************************/
510 
511 size_t iso8601(T) (T[] src, ref Time value)
512 {
513         TimeOfDay tod;
514         Date      date;
515 
516         size_t r = iso8601!(T)(src, tod, date);
517         if (r)   
518             value = Gregorian.generic.toTime(date, tod);
519         return r;
520 }
521 
522 /******************************************************************************
523 
524         ISO-8601 format :: "2006-01-31 14:49:30,001"
525 
526         Returns the number of elements consumed by the parse; zero if
527         the parse failed
528 
529         Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
530         "Decimal fractions may also be added to any of the three time elements.
531         A decimal point, either a comma or a dot (without any preference as
532         stated most recently in resolution 10 of the 22nd General Conference
533         CGPM in 2003), is used as a separator between the time element and
534         its fraction."
535 
536 ******************************************************************************/
537 
538 size_t iso8601(T) (T[] src, ref TimeOfDay tod, ref Date date)
539 {
540         T* p = src.ptr;
541         T* e = p + src.length;
542 
543         bool dt (ref T* p)
544         {
545                 return ((date.year = parseInt(p, e)) > 0   &&
546                          *p++ == '-'                       &&
547                         ((date.month = parseInt(p, e)) > 0 &&
548                         (*p++ == '-'                       &&
549                         (date.day = parseInt(p, e)) > 0)));
550         }
551 
552         if (dt(p) >= 0       &&
553             *p++ == ' '      &&
554             time (tod, p, e))
555             {
556             // Are there chars left? If yes, parse millis. If no, millis = 0.
557             if (p - src.ptr) {
558                 // check fraction separator
559                 T frac_sep = *p++;
560                 if (frac_sep is ',' || frac_sep is '.')
561                     // separator is ok: parse millis
562                     tod.millis = parseInt (p, e);
563                 else
564                     // wrong separator: error 
565                     return 0;
566             } else
567                 tod.millis = 0;
568             
569             return cast(size_t) (p - src.ptr);
570             }
571         return 0;
572 }
573 
574 
575 /******************************************************************************
576 
577         Parse a time field
578 
579 ******************************************************************************/
580 
581 private bool time(T) (ref TimeOfDay time, ref T* p, T* e)
582 {
583         return ((time.hours = parseInt(p, e)) >= 0   &&
584                  *p++ == ':'                         &&
585                 (time.minutes = parseInt(p, e)) >= 0 &&
586                  *p++ == ':'                         &&
587                 (time.seconds = parseInt(p, e)) >= 0);
588 }
589 
590 
591 /******************************************************************************
592 
593         Match a month from the input
594 
595 ******************************************************************************/
596 
597 private int parseMonth(T) (ref T* p)
598 {
599         int month;
600 
601         switch (p[0..3])
602                {
603                case "Jan":
604                     month = 1;
605                     break; 
606                case "Feb":
607                     month = 2;
608                     break; 
609                case "Mar":
610                     month = 3;
611                     break; 
612                case "Apr":
613                     month = 4;
614                     break; 
615                case "May":
616                     month = 5;
617                     break; 
618                case "Jun":
619                     month = 6;
620                     break; 
621                case "Jul":
622                     month = 7;
623                     break; 
624                case "Aug":
625                     month = 8;
626                     break; 
627                case "Sep":
628                     month = 9;
629                     break; 
630                case "Oct":
631                     month = 10;
632                     break; 
633                case "Nov":
634                     month = 11;
635                     break; 
636                case "Dec":
637                     month = 12;
638                     break; 
639                default:
640                     return month;
641                }
642         p += 3;
643         return month;
644 }
645 
646 
647 /******************************************************************************
648 
649         Match a day from the input
650 
651 ******************************************************************************/
652 
653 private int parseShortDay(T) (ref T* p)
654 {
655         int day;
656 
657         switch (p[0..3])
658                {
659                case "Sun":
660                     day = 0;
661                     break;
662                case "Mon":
663                     day = 1;
664                     break; 
665                case "Tue":
666                     day = 2;
667                     break; 
668                case "Wed":
669                     day = 3;
670                     break; 
671                case "Thu":
672                     day = 4;
673                     break; 
674                case "Fri":
675                     day = 5;
676                     break; 
677                case "Sat":
678                     day = 6;
679                     break; 
680                default:
681                     return -1;
682                }
683         p += 3;
684         return day;
685 }
686 
687 
688 /******************************************************************************
689 
690         Match a day from the input. Sunday is 0
691 
692 ******************************************************************************/
693 
694 private int parseFullDay(T) (ref T* p)
695 {
696         __gshared immutable const(T)[][] days =
697                 [
698                 "Sunday", 
699                 "Monday", 
700                 "Tuesday", 
701                 "Wednesday", 
702                 "Thursday", 
703                 "Friday", 
704                 "Saturday", 
705                 ];
706 
707         foreach (i, day; days)
708                  if (day == p[0..day.length])
709                     {
710                     p += day.length;
711                     return cast(int)i;
712                     }
713         return -1;
714 }
715 
716 
717 /******************************************************************************
718 
719         Extract an integer from the input
720 
721 ******************************************************************************/
722 
723 private static int parseInt(T) (ref T* p, T* e)
724 {
725         int value;
726 
727         while (p < e && (*p >= '0' && *p <= '9'))
728                value = value * 10 + *p++ - '0';
729         return value;
730 }
731 
732 
733 /******************************************************************************
734 
735 ******************************************************************************/
736 
737 debug (UnitTest)
738 {
739         unittest
740         {
741         wchar[30] tmp;
742         const(wchar)[] test = "Sun, 06 Nov 1994 08:49:37 GMT";
743 
744         auto time = parse (test);
745         auto text = format (tmp, time);
746         assert (text == test);
747 
748         const(char)[] garbageTest = "Wed Jun 11 17:22:07 20088";
749         garbageTest = garbageTest[0..$-1];
750         char[128] tmp2;
751 
752         time = parse(garbageTest);
753         auto text2 = format(tmp2, time);
754         assert (text2 == "Wed, 11 Jun 2008 17:22:07 GMT");
755         }
756 }
757 
758 /******************************************************************************
759 
760 ******************************************************************************/
761 
762 debug (TimeStamp)
763 {
764         void main()
765         {
766                 Time t;
767 
768                 auto dos = "12-31-06 08:49AM";
769                 auto iso = "2006-01-31 14:49:30,001";
770                 assert (dostime(dos, t) == dos.length);
771                 assert (iso8601(iso, t) == iso.length);
772 
773                 wchar[30] tmp;
774                 wchar[] test = "Sun, 06 Nov 1994 08:49:37 GMT";
775                 
776                 auto time = parse (test);
777                 auto text = format (tmp, time);
778                 assert (text == test);              
779         }
780 }