1 /*******************************************************************************
2 *
3 * copyright: Copyright (c) 2007 Daniel Keep. All rights reserved.
4 *
5 * license: BSD style: $(LICENSE)
6 *
7 * version: Initial release: December 2007
8 *
9 * author: Daniel Keep
10 *
11 ******************************************************************************/
12
13 module tango.util.compress.Zip;
14
15 /*
16
17 TODO
18 ====
19
20 * Disable UTF encoding until I've worked out what version of Zip that's
21 related to... (actually; it's entirely possible that's it's merely a
22 *proposal* at the moment.) (*Done*)
23
24 * Make ZipEntry safe: make them aware that their creating reader has been
25 destroyed.
26
27 */
28
29 import tango.core.ByteSwap : ByteSwap;
30 import tango.io.device.Array : Array;
31 import tango.io.device.File : File;
32 import Path = tango.io.Path;
33 import tango.io.device.FileMap : FileMap;
34 import tango.io.stream.Zlib : ZlibInput, ZlibOutput;
35 import tango.util.digest.Crc32 : Crc32;
36 import tango.io.model.IConduit : IConduit, InputStream, OutputStream;
37 import tango.io.stream.Digester : DigestInput;
38 import tango.time.Time : Time, TimeSpan;
39 import tango.time.WallClock : WallClock;
40 import tango.time.chrono.Gregorian : Gregorian;
41
42 import Integer = tango.text.convert.Integer;
43
44 debug(Zip) import tango.io.Stdout : Stderr;
45
46 //////////////////////////////////////////////////////////////////////////////
47 //////////////////////////////////////////////////////////////////////////////
48 //
49 // Implementation crap
50 //
51 // Why is this here, you ask? Because of bloody DMD forward reference bugs.
52 // For pete's sake, Walter, FIX THEM, please!
53 //
54 // To skip to the actual user-visible stuff, search for "Shared stuff".
55
56 private
57 {
58
59 //////////////////////////////////////////////////////////////////////////////
60 //////////////////////////////////////////////////////////////////////////////
61 //
62 // LocalFileHeader
63 //
64
65 align(1)
66 struct LocalFileHeaderData
67 {
68 align(1):
69 ushort extract_version = ushort.max;
70 ushort general_flags = 0;
71 ushort compression_method = 0;
72 ushort modification_file_time = 0;
73 ushort modification_file_date = 0;
74 uint crc_32 = 0; // offsetof = 10
75 uint compressed_size = 0;
76 uint uncompressed_size = 0;
77 ushort file_name_length = 0;
78 ushort extra_field_length = 0;
79
80 debug(Zip) void dump()
81 {
82 Stderr
83 ("LocalFileHeader.Data {")("\n")
84 (" extract_version = ")(extract_version)("\n")
85 (" general_flags = ")(general_flags)("\n")
86 (" compression_method = ")(compression_method)("\n")
87 (" modification_file_time = ")(modification_file_time)("\n")
88 (" modification_file_date = ")(modification_file_date)("\n")
89 (" crc_32 = ")(crc_32)("\n")
90 (" compressed_size = ")(compressed_size)("\n")
91 (" uncompressed_size = ")(uncompressed_size)("\n")
92 (" file_name_length = ")(file_name_length)("\n")
93 (" extra_field_length = ")(extra_field_length)("\n")
94 ("}").newline;
95 }
96 }
97
98 struct LocalFileHeader
99 {
100 enum uint signature = 0x04034b50;
101
102 alias LocalFileHeaderData Data;
103 Data data;
104 static assert( Data.sizeof == 26 );
105
106 char[] file_name;
107 ubyte[] extra_field;
108
109 void[] data_arr()
110 {
111 return (&data)[0..1];
112 }
113
114 void put(OutputStream output)
115 {
116 // Make sure var-length fields will fit.
117 if( file_name.length > ushort.max )
118 ZipException.fntoolong;
119
120 if( extra_field.length > ushort.max )
121 ZipException.eftoolong;
122
123 // Encode filename
124 auto file_name = utf8_to_cp437(this.file_name);
125 scope(exit) if( file_name !is cast(ubyte[])this.file_name )
126 delete file_name;
127
128 if( file_name is null )
129 ZipException.fnencode;
130
131 // Update lengths in data
132 Data data = this.data;
133 data.file_name_length = cast(ushort) file_name.length;
134 data.extra_field_length = cast(ushort) extra_field.length;
135
136 // Do it
137 version( BigEndian ) swapAll(data);
138 writeExact(output, (&data)[0..1]);
139 writeExact(output, file_name);
140 writeExact(output, extra_field);
141 }
142
143 void fill(InputStream src)
144 {
145 readExact(src, data_arr());
146 version( BigEndian ) swapAll(data);
147
148 //debug(Zip) data.dump;
149
150 auto tmp = new ubyte[data.file_name_length];
151 readExact(src, tmp);
152 file_name = cp437_to_utf8(tmp);
153 if( cast(char*) tmp.ptr !is file_name.ptr ) delete tmp;
154
155 extra_field = new ubyte[data.extra_field_length];
156 readExact(src, extra_field);
157 }
158
159 /*
160 * This method will check to make sure that the local and central headers
161 * are the same; if they're not, then that indicates that the archive is
162 * corrupt.
163 */
164 bool agrees_with(FileHeader h)
165 {
166 // NOTE: extra_field used to be compared with h.extra_field, but this caused
167 // an assertion in certain archives. I found a mention of these fields being
168 // allowed to be different, so I think it in general is wrong to include in
169 // this sanity check. larsivi 20081111
170 if( data.extract_version != h.data.extract_version
171 || data.general_flags != h.data.general_flags
172 || data.compression_method != h.data.compression_method
173 || data.modification_file_time != h.data.modification_file_time
174 || data.modification_file_date != h.data.modification_file_date
175 || file_name != h.file_name )
176 return false;
177
178 // We need a separate check for the sizes and crc32, since these will
179 // be zero if a trailing descriptor was used.
180 if( !h.usingDataDescriptor() && (
181 data.crc_32 != h.data.crc_32
182 || data.compressed_size != h.data.compressed_size
183 || data.uncompressed_size != h.data.uncompressed_size ) )
184 return false;
185
186 return true;
187 }
188 }
189
190 //////////////////////////////////////////////////////////////////////////////
191 //////////////////////////////////////////////////////////////////////////////
192 //
193 // FileHeader
194 //
195
196 align(1)
197 struct FileHeaderData
198 {
199 align(1):
200 ubyte zip_version;
201 ubyte file_attribute_type;
202 ushort extract_version;
203 ushort general_flags;
204 ushort compression_method;
205 ushort modification_file_time;
206 ushort modification_file_date;
207 uint crc_32;
208 uint compressed_size;
209 uint uncompressed_size;
210 ushort file_name_length;
211 ushort extra_field_length;
212 ushort file_comment_length;
213 ushort disk_number_start;
214 ushort internal_file_attributes = 0;
215 uint external_file_attributes = 0;
216 int relative_offset_of_local_header;
217
218 debug(Zip) void dump()
219 {
220 Stderr
221 ("FileHeader.Data {\n")
222 (" zip_version = ")(zip_version)("\n")
223 (" file_attribute_type = ")(file_attribute_type)("\n")
224 (" extract_version = ")(extract_version)("\n")
225 (" general_flags = ")(general_flags)("\n")
226 (" compression_method = ")(compression_method)("\n")
227 (" modification_file_time = ")(modification_file_time)("\n")
228 (" modification_file_date = ")(modification_file_date)("\n")
229 (" crc_32 = ")(crc_32)("\n")
230 (" compressed_size = ")(compressed_size)("\n")
231 (" uncompressed_size = ")(uncompressed_size)("\n")
232 (" file_name_length = ")(file_name_length)("\n")
233 (" extra_field_length = ")(extra_field_length)("\n")
234 (" file_comment_length = ")(file_comment_length)("\n")
235 (" disk_number_start = ")(disk_number_start)("\n")
236 (" internal_file_attributes = ")(internal_file_attributes)("\n")
237 (" external_file_attributes = ")(external_file_attributes)("\n")
238 (" relative_offset_of_local_header = ")(relative_offset_of_local_header)
239 ("\n")
240 ("}").newline;
241 }
242
243 void fromLocal(LocalFileHeader.Data data)
244 {
245 extract_version = data.extract_version;
246 general_flags = data.general_flags;
247 compression_method = data.compression_method;
248 modification_file_time = data.modification_file_time;
249 modification_file_date = data.modification_file_date;
250 crc_32 = data.crc_32;
251 compressed_size = data.compressed_size;
252 uncompressed_size = data.uncompressed_size;
253 file_name_length = data.file_name_length;
254 extra_field_length = data.extra_field_length;
255 }
256 }
257
258 struct FileHeader
259 {
260 enum uint signature = 0x02014b50;
261
262 alias FileHeaderData Data;
263 Data* data;
264 static assert( Data.sizeof == 42 );
265
266 const(char)[] file_name;
267 ubyte[] extra_field;
268 const(char)[] file_comment;
269
270 bool usingDataDescriptor()
271 {
272 return !!(data.general_flags & 1<<3);
273 }
274
275 uint compressionOptions()
276 {
277 return (data.general_flags >> 1) & 0b11;
278 }
279
280 bool usingUtf8()
281 {
282 //return !!(data.general_flags & 1<<11);
283 return false;
284 }
285
286 void[] data_arr()
287 {
288 return (cast(void*)data)[0 .. Data.sizeof];
289 }
290
291 void put(OutputStream output)
292 {
293 // Make sure the var-length fields will fit.
294 if( file_name.length > ushort.max )
295 ZipException.fntoolong;
296
297 if( extra_field.length > ushort.max )
298 ZipException.eftoolong;
299
300 if( file_comment.length > ushort.max )
301 ZipException.cotoolong;
302
303 // encode the filename and comment
304 auto file_name = utf8_to_cp437(this.file_name);
305 scope(exit) if( file_name !is cast(ubyte[])this.file_name )
306 delete file_name;
307 auto file_comment = utf8_to_cp437(this.file_comment);
308 scope(exit) if( file_comment !is cast(ubyte[])this.file_comment )
309 delete file_comment;
310
311 if( file_name is null )
312 ZipException.fnencode;
313
314 if( file_comment is null && this.file_comment !is null )
315 ZipException.coencode;
316
317 // Update the lengths
318 Data data = *(this.data);
319 data.file_name_length = cast(ushort) file_name.length;
320 data.extra_field_length = cast(ushort) extra_field.length;
321 data.file_comment_length = cast(ushort) file_comment.length;
322
323 // Ok; let's do this!
324 version( BigEndian ) swapAll(data);
325 writeExact(output, (&data)[0..1]);
326 writeExact(output, file_name);
327 writeExact(output, extra_field);
328 writeExact(output, file_comment);
329 }
330
331 long map(void[] src)
332 {
333 //debug(Zip) Stderr.formatln("FileHeader.map([0..{}])",src.length);
334
335 auto old_ptr = src.ptr;
336
337 data = cast(Data*) src.ptr;
338 src = src[Data.sizeof..$];
339 version( BigEndian ) swapAll(*data);
340
341 //debug(Zip) data.dump;
342
343 inout(char[]) function(inout(ubyte[])) conv_fn;
344 if( usingUtf8() )
345 conv_fn = &cp437_to_utf8;
346 else
347 conv_fn = &utf8_to_utf8;
348
349 file_name = conv_fn(
350 cast(ubyte[]) src[0..data.file_name_length]);
351 src = src[data.file_name_length..$];
352
353 extra_field = cast(ubyte[]) src[0..data.extra_field_length];
354 src = src[data.extra_field_length..$];
355
356 file_comment = conv_fn(
357 cast(ubyte[]) src[0..data.file_comment_length]);
358 src = src[data.file_comment_length..$];
359
360 // Return how many bytes we've eaten
361 //debug(Zip) Stderr.formatln(" . used {} bytes", cast(long)(src.ptr - old_ptr));
362 return cast(long)(src.ptr - old_ptr);
363 }
364 }
365
366 //////////////////////////////////////////////////////////////////////////////
367 //////////////////////////////////////////////////////////////////////////////
368 //
369 // EndOfCDRecord
370 //
371
372 align(1)
373 struct EndOfCDRecordData
374 {
375 align(1):
376 ushort disk_number = 0;
377 ushort disk_with_start_of_central_directory = 0;
378 ushort central_directory_entries_on_this_disk;
379 ushort central_directory_entries_total;
380 uint size_of_central_directory;
381 uint offset_of_start_of_cd_from_starting_disk;
382 ushort file_comment_length;
383
384 debug(Zip) void dump()
385 {
386 Stderr
387 .formatln("EndOfCDRecord.Data {}","{")
388 .formatln(" disk_number = {}", disk_number)
389 .formatln(" disk_with_start_of_central_directory = {}",
390 disk_with_start_of_central_directory)
391 .formatln(" central_directory_entries_on_this_disk = {}",
392 central_directory_entries_on_this_disk)
393 .formatln(" central_directory_entries_total = {}",
394 central_directory_entries_total)
395 .formatln(" size_of_central_directory = {}",
396 size_of_central_directory)
397 .formatln(" offset_of_start_of_cd_from_starting_disk = {}",
398 offset_of_start_of_cd_from_starting_disk)
399 .formatln(" file_comment_length = {}", file_comment_length)
400 .formatln("}");
401 }
402 }
403
404 struct EndOfCDRecord
405 {
406 enum uint signature = 0x06054b50;
407
408 alias EndOfCDRecordData Data;
409 Data data;
410 static assert( data.sizeof == 18 );
411
412 char[] file_comment;
413
414 void[] data_arr()
415 {
416 return (cast(void*)&data)[0 .. data.sizeof];
417 }
418
419 void put(OutputStream output)
420 {
421 // Set up the comment; check length, encode
422 if( file_comment.length > ushort.max )
423 ZipException.cotoolong;
424
425 auto file_comment = utf8_to_cp437(this.file_comment);
426 scope(exit) if( file_comment !is cast(ubyte[])this.file_comment )
427 delete file_comment;
428
429 // Set up data block
430 Data data = this.data;
431 data.file_comment_length = cast(ushort) file_comment.length;
432
433 version( BigEndian ) swapAll(data);
434 writeExact(output, (&data)[0..1]);
435 }
436
437 void fill(void[] src)
438 {
439 //Stderr.formatln("EndOfCDRecord.fill([0..{}])",src.length);
440
441 auto _data = data_arr();
442 _data[] = src[0.._data.length];
443 src = src[_data.length..$];
444 version( BigEndian ) swapAll(data);
445
446 //data.dump;
447
448 file_comment = cast(char[]) src[0..data.file_comment_length].dup;
449 }
450 }
451
452 // End of implementation crap
453 }
454
455 //////////////////////////////////////////////////////////////////////////////
456 //////////////////////////////////////////////////////////////////////////////
457 //
458 // Shared stuff
459
460 public
461 {
462 /**
463 * This enumeration denotes the kind of compression used on a file.
464 */
465 enum Method
466 {
467 /// No compression should be used.
468 Store,
469 /// Deflate compression.
470 Deflate,
471 /**
472 * This is a special value used for unsupported or unrecognised
473 * compression methods. This value is only used internally.
474 */
475 Unsupported
476 }
477 }
478
479 private
480 {
481 const ushort ZIP_VERSION = 20;
482 const ushort MAX_EXTRACT_VERSION = 20;
483
484 /* compression flags
485 uses trailing descriptor |
486 utf-8 encoding | |
487 ^ ^ /\ */
488 const ushort SUPPORTED_FLAGS = 0b00_0_0_0_0000_0_0_0_1_11_0;
489 const ushort UNSUPPORTED_FLAGS = ~SUPPORTED_FLAGS;
490
491 Method toMethod(ushort method)
492 {
493 switch( method )
494 {
495 case 0: return Method.Store;
496 case 8: return Method.Deflate;
497 default: return Method.Unsupported;
498 }
499 }
500
501 ushort fromMethod(Method method)
502 {
503 switch( method )
504 {
505 case Method.Store: return 0;
506 case Method.Deflate: return 8;
507 default:
508 assert(false, "unsupported compression method");
509 }
510 }
511
512 /* NOTE: This doesn't actually appear to work. Using the default magic
513 * number with Tango's Crc32 digest works, however.
514 */
515 //const CRC_MAGIC = 0xdebb20e3u;
516 }
517
518 //////////////////////////////////////////////////////////////////////////////
519 //////////////////////////////////////////////////////////////////////////////
520 //
521 // ZipReader
522
523 interface ZipReader
524 {
525 bool streamed();
526 void close();
527 bool more();
528 ZipEntry get();
529 ZipEntry get(ZipEntry);
530 int opApply(int delegate(ref ZipEntry));
531 }
532
533 //////////////////////////////////////////////////////////////////////////////
534 //////////////////////////////////////////////////////////////////////////////
535 //
536 // ZipWriter
537
538 interface ZipWriter
539 {
540 void finish();
541 void putFile(ZipEntryInfo info, const(char)[] path);
542 void putStream(ZipEntryInfo info, InputStream source);
543 void putEntry(ZipEntryInfo info, ZipEntry entry);
544 void putData(ZipEntryInfo info, const(void)[] data);
545 Method method();
546 Method method(Method);
547 }
548
549 //////////////////////////////////////////////////////////////////////////////
550 //////////////////////////////////////////////////////////////////////////////
551 //
552 // ZipBlockReader
553
554 /**
555 * The ZipBlockReader class is used to parse a Zip archive. It exposes the
556 * contents of the archive via an iteration interface. For instance, to loop
557 * over all files in an archive, one can use either
558 *
559 * -----
560 * foreach( entry ; reader )
561 * ...
562 * -----
563 *
564 * Or
565 *
566 * -----
567 * while( reader.more )
568 * {
569 * auto entry = reader.get;
570 * ...
571 * }
572 * -----
573 *
574 * See the ZipEntry class for more information on the contents of entries.
575 *
576 * Note that this class can only be used with input sources which can be
577 * freely seeked. Also note that you may open a ZipEntry instance produced by
578 * this reader at any time until the ZipReader that created it is closed.
579 */
580 class ZipBlockReader : ZipReader
581 {
582 /**
583 * Creates a ZipBlockReader using the specified file on the local
584 * filesystem.
585 */
586 this(const(char)[] path)
587 {
588 file_source = new File(path);
589 this(file_source);
590 }
591
592 /**
593 * Creates a ZipBlockReader using the provided InputStream. Please note
594 * that this InputStream must be attached to a conduit implementing the
595 * IConduit.Seek interface.
596 */
597 this(InputStream source)
598 in
599 {
600 assert( cast(IConduit.Seek) source.conduit, "source stream must be seekable" );
601 }
602 body
603 {
604 this.source = source;
605 this.seeker = source; //cast(IConduit.Seek) source;
606 }
607
608 bool streamed() { return false; }
609
610 /**
611 * Closes the reader, and releases all resources. After this operation,
612 * all ZipEntry instances created by this ZipReader are invalid and should
613 * not be used.
614 */
615 void close()
616 {
617 // NOTE: Originally more of the GC allocated data in this class were
618 // explicitly deleted here, such as cd_data - this caused segfaults
619 // and have been removed as they were not necessary from correctness
620 // point of view, and the memory usage win is questionable.
621 state = State.Done;
622 source = null;
623 seeker = null;
624 delete headers;
625
626 if( file_source !is null )
627 {
628 file_source.close();
629 delete file_source;
630 }
631 }
632
633 /**
634 * Returns true if and only if there are additional files in the archive
635 * which have not been read via the get method. This returns true before
636 * the first call to get (assuming the opened archive is non-empty), and
637 * false after the last file has been accessed.
638 */
639 bool more()
640 {
641 switch( state )
642 {
643 case State.Init:
644 read_cd();
645 assert( state == State.Open );
646 return more();
647
648 case State.Open:
649 return (current_index < headers.length);
650
651 case State.Done:
652 return false;
653
654 default:
655 assert(false);
656 }
657 }
658
659 /**
660 * Retrieves the next file from the archive. Note that although this does
661 * perform IO operations, it will not read the contents of the file.
662 *
663 * The optional reuse argument can be used to instruct the reader to reuse
664 * an existing ZipEntry instance. If passed a null reference, it will
665 * create a new ZipEntry instance.
666 */
667 ZipEntry get()
668 {
669 if( !more() )
670 ZipExhaustedException();
671
672 return new ZipEntry(headers[current_index++], &open_file);
673 }
674
675 /// ditto
676 ZipEntry get(ZipEntry reuse)
677 {
678 if( !more() )
679 ZipExhaustedException();
680
681 if( reuse is null )
682 return new ZipEntry(headers[current_index++], &open_file);
683 else
684 return reuse.reset(headers[current_index++], &open_file);
685 }
686
687 /**
688 * This is used to iterate over the contents of an archive using a foreach
689 * loop. Please note that the iteration will reuse the ZipEntry instance
690 * passed to your loop. If you wish to keep the instance and re-use it
691 * later, you $(B must) use the dup member to create a copy.
692 */
693 int opApply(int delegate(ref ZipEntry) dg)
694 {
695 int result = 0;
696 ZipEntry entry;
697
698 while( more() )
699 {
700 entry = get(entry);
701
702 result = dg(entry);
703 if( result )
704 break;
705 }
706
707 if( entry !is null )
708 delete entry;
709
710 return result;
711 }
712
713 private:
714 InputStream source;
715 InputStream seeker; //IConduit.Seek seeker;
716
717 enum State { Init, Open, Done }
718 State state;
719 size_t current_index = 0;
720 FileHeader[] headers;
721
722 // These should be killed when the reader is closed.
723 ubyte[] cd_data;
724 File file_source = null;
725
726 /*
727 * This function will read the contents of the central directory. Split
728 * or spanned archives aren't supported.
729 */
730 void read_cd()
731 in
732 {
733 assert( state == State.Init );
734 assert( headers is null );
735 assert( cd_data is null );
736 }
737 out
738 {
739 assert( state == State.Open );
740 assert( headers !is null );
741 assert( cd_data !is null );
742 assert( current_index == 0 );
743 }
744 body
745 {
746 //Stderr.formatln("ZipReader.read_cd()");
747
748 // First, we need to locate the end of cd record, so that we know
749 // where the cd itself is, and how big it is.
750 auto eocdr = read_eocd_record();
751
752 // Now, make sure the archive is all in one file.
753 if( eocdr.data.disk_number !=
754 eocdr.data.disk_with_start_of_central_directory
755 || eocdr.data.central_directory_entries_on_this_disk !=
756 eocdr.data.central_directory_entries_total )
757 ZipNotSupportedException.spanned();
758
759 // Ok, read the whole damn thing in one go.
760 cd_data = new ubyte[eocdr.data.size_of_central_directory];
761 long cd_offset = eocdr.data.offset_of_start_of_cd_from_starting_disk;
762 seeker.seek(cd_offset, seeker.Anchor.Begin);
763 readExact(source, cd_data);
764
765 // Cake. Now, we need to break it up into records.
766 headers = new FileHeader[
767 eocdr.data.central_directory_entries_total];
768
769 long cdr_offset = cd_offset;
770
771 // Ok, map the CD data into file headers.
772 foreach( i,ref header ; headers )
773 {
774 //Stderr.formatln(" . reading header {}...", i);
775
776 // Check signature
777 {
778 uint sig = (cast(uint[])(cd_data[0..4]))[0];
779 version( BigEndian ) swap(sig);
780 if( sig != FileHeader.signature )
781 ZipException.badsig("file header");
782 }
783
784 auto used = header.map(cd_data[4..$]);
785 assert( used <= (size_t.max-4) );
786 cd_data = cd_data[4+cast(size_t)used..$];
787
788 // Update offset for next record
789 cdr_offset += 4 /* for sig. */ + used;
790 }
791
792 // Done!
793 state = State.Open;
794 }
795
796 /*
797 * This will locate the end of CD record in the open stream.
798 *
799 * This code sucks, but that's because Zip sucks.
800 *
801 * Basically, the EOCD record is stuffed somewhere at the end of the file.
802 * In a brilliant move, the record is *variably sized*, which means we
803 * have to do a linear backwards search to find it.
804 *
805 * The header itself (including the signature) is at minimum 22 bytes
806 * long, plus anywhere between 0 and 2^16-1 bytes of comment. That means
807 * we need to read the last 2^16-1 + 22 bytes from the file, and look for
808 * the signature [0x50,0x4b,0x05,0x06] in [0 .. $-18].
809 *
810 * If we find the EOCD record, we'll return its contents. If we couldn't
811 * find it, we'll throw an exception.
812 */
813 EndOfCDRecord read_eocd_record()
814 in
815 {
816 assert( state == State.Init );
817 }
818 body
819 {
820 //Stderr.formatln("read_eocd_record()");
821
822 // Signature + record + max. comment length
823 const max_chunk_len = 4 + EndOfCDRecord.Data.sizeof + ushort.max;
824
825 auto file_len = seeker.seek(0, seeker.Anchor.End);
826 assert( file_len <= size_t.max );
827
828 // We're going to need min(max_chunk_len, file_len) bytes.
829 size_t chunk_len = max_chunk_len;
830 if( file_len < max_chunk_len )
831 chunk_len = cast(size_t) file_len;
832 //Stderr.formatln(" . chunk_len = {}", chunk_len);
833
834 // Seek back and read in the chunk. Don't forget to clean up after
835 // ourselves.
836 seeker.seek(-cast(long)chunk_len, seeker.Anchor.End);
837 auto chunk_offset = seeker.seek(0, seeker.Anchor.Current);
838 //Stderr.formatln(" . chunk_offset = {}", chunk_offset);
839 auto chunk = new ubyte[chunk_len];
840 scope(exit) delete chunk;
841 readExact(source, chunk);
842
843 // Now look for our magic number. Don't forget that on big-endian
844 // machines, we need to byteswap the value we're looking for.
845 uint eocd_magic = EndOfCDRecord.signature;
846 version( BigEndian )
847 swap(eocd_magic);
848
849 size_t eocd_loc = -1;
850
851 if( chunk_len >= 18 )
852 for( size_t i=chunk_len-18; i>=0; --i )
853 {
854 if( *(cast(uint*)(chunk.ptr+i)) == eocd_magic )
855 {
856 // Found the bugger! Make sure we skip the signature (forgot
857 // to do that originally; talk about weird errors :P)
858 eocd_loc = i+4;
859 break;
860 }
861 }
862
863 // If we didn't find it, then we'll assume that this is not a valid
864 // archive.
865 if( eocd_loc == -1 )
866 ZipException.missingdir;
867
868 // Ok, so we found it; now what? Now we need to read the record
869 // itself in. eocd_loc is the offset within the chunk where the eocd
870 // record was found, so slice it out.
871 EndOfCDRecord eocdr;
872 eocdr.fill(chunk[eocd_loc..$]);
873
874 // Excellent. We're done here.
875 return eocdr;
876 }
877
878 /*
879 * Opens the specified file for reading. If the raw argument passed is
880 * true, then the file is *not* decompressed.
881 */
882 InputStream open_file(FileHeader header, bool raw)
883 {
884 // Check to make sure that we actually *can* open this file.
885 if( header.data.extract_version > MAX_EXTRACT_VERSION )
886 ZipNotSupportedException.zipver(header.data.extract_version);
887
888 if( header.data.general_flags & UNSUPPORTED_FLAGS )
889 ZipNotSupportedException.flags();
890
891 if( toMethod(header.data.compression_method) == Method.Unsupported )
892 ZipNotSupportedException.method(header.data.compression_method);
893
894 // Open a raw stream
895 InputStream stream = open_file_raw(header);
896
897 // If that's all they wanted, pass it back.
898 if( raw )
899 return stream;
900
901 // Next up, wrap in an appropriate decompression stream
902 switch( toMethod(header.data.compression_method) )
903 {
904 case Method.Store:
905 // Do nothing: \o/
906 break;
907
908 case Method.Deflate:
909 // Wrap in a zlib stream. We want a raw deflate stream,
910 // so force no encoding.
911 stream = new ZlibInput(stream, ZlibInput.Encoding.None);
912 break;
913
914 default:
915 assert(false);
916 }
917
918 // We done, yo!
919 return stream;
920 }
921
922 /*
923 * Opens a file's raw input stream. Basically, this returns a slice of
924 * the archive's input stream.
925 */
926 InputStream open_file_raw(FileHeader header)
927 {
928 // Seek to and parse the local file header
929 seeker.seek(header.data.relative_offset_of_local_header,
930 seeker.Anchor.Begin);
931
932 {
933 uint sig;
934 readExact(source, (&sig)[0..1]);
935 version( BigEndian ) swap(sig);
936 if( sig != LocalFileHeader.signature )
937 ZipException.badsig("local file header");
938 }
939
940 LocalFileHeader lheader; lheader.fill(source);
941
942 if( !lheader.agrees_with(header) )
943 ZipException.incons(header.file_name);
944
945 // Ok; get a slice stream for the file
946 return new SliceSeekInputStream(
947 source, seeker.seek(0, seeker.Anchor.Current),
948 header.data.compressed_size);
949 }
950 }
951
952 //////////////////////////////////////////////////////////////////////////////
953 //////////////////////////////////////////////////////////////////////////////
954 //
955 // ZipBlockWriter
956
957 /**
958 * The ZipBlockWriter class is used to create a Zip archive. It uses a
959 * writing iterator interface.
960 *
961 * Note that this class can only be used with output streams which can be
962 * freely seeked.
963 */
964
965 class ZipBlockWriter : ZipWriter
966 {
967 /**
968 * Creates a ZipBlockWriter using the specified file on the local
969 * filesystem.
970 */
971 this(const(char)[] path)
972 {
973 file_output = new File(path, File.WriteCreate);
974 this(file_output);
975 }
976
977 /**
978 * Creates a ZipBlockWriter using the provided OutputStream. Please note
979 * that this OutputStream must be attached to a conduit implementing the
980 * IConduit.Seek interface.
981 */
982 this(OutputStream output)
983 in
984 {
985 assert( output !is null );
986 assert( (cast(IConduit.Seek) output.conduit) !is null );
987 }
988 body
989 {
990 this.output = output;
991 this.seeker = output; // cast(IConduit.Seek) output;
992
993 // Default to Deflate compression
994 method = Method.Deflate;
995 }
996
997 /**
998 * Finalises the archive, writes out the central directory, and closes the
999 * output stream.
1000 */
1001 void finish()
1002 {
1003 put_cd();
1004 output.close();
1005 output = null;
1006 seeker = null;
1007
1008 if( file_output !is null ) delete file_output;
1009 }
1010
1011 /**
1012 * Adds a file from the local filesystem to the archive.
1013 */
1014 void putFile(ZipEntryInfo info, const(char)[] path)
1015 {
1016 scope file = new File(path);
1017 scope(exit) file.close();
1018 putStream(info, file);
1019 }
1020
1021 /**
1022 * Adds a file using the contents of the given InputStream to the archive.
1023 */
1024 void putStream(ZipEntryInfo info, InputStream source)
1025 {
1026 put_compressed(info, source);
1027 }
1028
1029 /**
1030 * Transfers a file from another archive into this archive. Note that
1031 * this method will not perform any compression: whatever compression was
1032 * applied to the file originally will be preserved.
1033 */
1034 void putEntry(ZipEntryInfo info, ZipEntry entry)
1035 {
1036 put_raw(info, entry);
1037 }
1038
1039 /**
1040 * Adds a file using the contents of the given array to the archive.
1041 */
1042 void putData(ZipEntryInfo info, const(void)[] data)
1043 {
1044 //scope mc = new MemoryConduit(data);
1045 scope mc = new Array(data.dup);
1046 scope(exit) mc.close();
1047 put_compressed(info, mc);
1048 }
1049
1050 /**
1051 * This property allows you to control what compression method should be
1052 * used for files being added to the archive.
1053 */
1054 @property
1055 Method method() { return _method; }
1056 @property
1057 Method method(Method v) { return _method = v; } /// ditto
1058
1059 private:
1060 OutputStream output;
1061 OutputStream seeker;
1062 File file_output;
1063
1064 Method _method;
1065
1066 struct Entry
1067 {
1068 FileHeaderData data;
1069 long header_position;
1070 const(char)[] filename;
1071 const(char)[] comment;
1072 ubyte[] extra;
1073 }
1074 Entry[] entries;
1075
1076 void put_cd()
1077 {
1078 // check that there aren't too many CD entries
1079 if( entries.length > ushort.max )
1080 ZipException.toomanyentries;
1081
1082 auto cd_pos = seeker.seek(0, seeker.Anchor.Current);
1083 if( cd_pos > uint.max )
1084 ZipException.toolong;
1085
1086 foreach( entry ; entries )
1087 {
1088 FileHeader header;
1089 header.data = &entry.data;
1090 header.file_name = entry.filename;
1091 header.extra_field = entry.extra;
1092 header.file_comment = entry.comment;
1093
1094 write(output, FileHeader.signature);
1095 header.put(output);
1096 }
1097
1098 auto cd_len = seeker.seek(0, seeker.Anchor.Current) - cd_pos;
1099
1100 if( cd_len > uint.max )
1101 ZipException.cdtoolong;
1102
1103 {
1104 assert( entries.length < ushort.max );
1105 assert( cd_len < uint.max );
1106 assert( cd_pos < uint.max );
1107
1108 EndOfCDRecord eocdr;
1109 eocdr.data.central_directory_entries_on_this_disk =
1110 cast(ushort) entries.length;
1111 eocdr.data.central_directory_entries_total =
1112 cast(ushort) entries.length;
1113 eocdr.data.size_of_central_directory =
1114 cast(uint) cd_len;
1115 eocdr.data.offset_of_start_of_cd_from_starting_disk =
1116 cast(uint) cd_pos;
1117
1118 write(output, EndOfCDRecord.signature);
1119 eocdr.put(output);
1120 }
1121 }
1122
1123 void put_raw(ZipEntryInfo info, ZipEntry entry)
1124 {
1125 // Write out local file header
1126 LocalFileHeader.Data lhdata;
1127 auto chdata = entry.header.data;
1128 lhdata.extract_version = chdata.extract_version;
1129
1130 // Note: we need to mask off the data descriptor bit because we aren't
1131 // going to write one.
1132 lhdata.general_flags = chdata.general_flags & ~(1<<3);
1133 lhdata.compression_method = chdata.compression_method;
1134 lhdata.crc_32 = chdata.crc_32;
1135 lhdata.compressed_size = chdata.compressed_size;
1136 lhdata.uncompressed_size = chdata.uncompressed_size;
1137
1138 timeToDos(info.modified, lhdata.modification_file_time,
1139 lhdata.modification_file_date);
1140
1141 put_local_header(lhdata, info.name);
1142
1143 // Store comment
1144 entries[$-1].comment = info.comment;
1145
1146 // Output file contents
1147 {
1148 auto input = entry.open_raw();
1149 scope(exit) input.close();
1150 output.copy(input).flush();
1151 }
1152 }
1153
1154 void put_compressed(ZipEntryInfo info, InputStream source)
1155 {
1156 debug(Zip) Stderr.formatln("ZipBlockWriter.put_compressed()");
1157
1158 // Write out partial local file header
1159 auto header_pos = seeker.seek(0, seeker.Anchor.Current);
1160 debug(Zip) Stderr.formatln(" . header for {} at {}", info.name, header_pos);
1161 put_local_header(info, _method);
1162
1163 // Store comment
1164 entries[$-1].comment = info.comment;
1165
1166 uint crc;
1167 uint compressed_size;
1168 uint uncompressed_size;
1169
1170 // Output file contents
1171 {
1172 // Input/output chains
1173 InputStream in_chain = source;
1174 OutputStream out_chain = new WrapSeekOutputStream(output);
1175
1176 // Count number of bytes coming in from the source file
1177 scope in_counter = new CounterInput(in_chain);
1178 in_chain = in_counter;
1179 assert( in_counter.count() <= typeof(uncompressed_size).max );
1180 scope(success) uncompressed_size = cast(uint) in_counter.count();
1181
1182 // Count the number of bytes going out to the archive
1183 scope out_counter = new CounterOutput(out_chain);
1184 out_chain = out_counter;
1185 assert( out_counter.count() <= typeof(compressed_size).max );
1186 scope(success) compressed_size = cast(uint) out_counter.count();
1187
1188 // Add crc
1189 scope crc_d = new Crc32(/*CRC_MAGIC*/);
1190 scope crc_s = new DigestInput(in_chain, crc_d);
1191 in_chain = crc_s;
1192 scope(success)
1193 {
1194 debug(Zip) Stderr.formatln(" . Success: storing CRC.");
1195 crc = crc_d.crc32Digest();
1196 }
1197
1198 // Add compression
1199 ZlibOutput compress;
1200 scope(exit) if( compress !is null ) delete compress;
1201
1202 switch( _method )
1203 {
1204 case Method.Store:
1205 break;
1206
1207 case Method.Deflate:
1208 compress = new ZlibOutput(out_chain,
1209 ZlibOutput.Level.init, ZlibOutput.Encoding.None);
1210 out_chain = compress;
1211 break;
1212
1213 default:
1214 assert(false);
1215 }
1216
1217 // All done.
1218 scope(exit) in_chain.close();
1219 scope(success) in_chain.flush();
1220 scope(exit) out_chain.close();
1221
1222 out_chain.copy(in_chain).flush();
1223
1224 debug(Zip) if( compress !is null )
1225 {
1226 Stderr.formatln(" . compressed to {} bytes", compress.written);
1227 }
1228
1229 debug(Zip) Stderr.formatln(" . wrote {} bytes", out_counter.count);
1230 debug(Zip) Stderr.formatln(" . contents written");
1231 }
1232
1233 debug(Zip) Stderr.formatln(" . CRC for \"{}\": 0x{:x8}", info.name, crc);
1234
1235 // Rewind, and patch the header
1236 auto final_pos = seeker.seek(0, seeker.Anchor.Current);
1237 seeker.seek(header_pos);
1238 patch_local_header(crc, compressed_size, uncompressed_size);
1239
1240 // Seek back to the end of the file, and we're done!
1241 seeker.seek(final_pos);
1242 }
1243
1244 /*
1245 * Patches the local file header starting at the current output location
1246 * with updated crc and size information. Also updates the current last
1247 * Entry.
1248 */
1249 void patch_local_header(uint crc_32, uint compressed_size,
1250 uint uncompressed_size)
1251 {
1252 /* BUG: For some reason, this code won't compile. No idea why... if
1253 * you instantiate LFHD, it says that there is no "offsetof" property.
1254 */
1255 /+
1256 alias LocalFileHeaderData LFHD;
1257 static assert( LFHD.compressed_size.offsetof
1258 == LFHD.crc_32.offsetof + 4 );
1259 static assert( LFHD.uncompressed_size.offsetof
1260 == LFHD.compressed_size.offsetof + 4 );
1261 +/
1262
1263 // Don't forget we have to seek past the signature, too
1264 // BUG: .offsetof is broken here
1265 /+seeker.seek(LFHD.crc_32.offsetof+4, seeker.Anchor.Current);+/
1266 seeker.seek(10+4, seeker.Anchor.Current);
1267 write(output, crc_32);
1268 write(output, compressed_size);
1269 write(output, uncompressed_size);
1270
1271 with( entries[$-1] )
1272 {
1273 data.crc_32 = crc_32;
1274 data.compressed_size = compressed_size;
1275 data.uncompressed_size = uncompressed_size;
1276 }
1277 }
1278
1279 /*
1280 * Generates and outputs a local file header from the given info block and
1281 * compression method. Note that the crc_32, compressed_size and
1282 * uncompressed_size header fields will be set to zero, and must be
1283 * patched.
1284 */
1285 void put_local_header(ZipEntryInfo info, Method method)
1286 {
1287 LocalFileHeader.Data data;
1288
1289 data.compression_method = fromMethod(method);
1290 timeToDos(info.modified, data.modification_file_time,
1291 data.modification_file_date);
1292
1293 put_local_header(data, info.name);
1294 }
1295
1296 /*
1297 * Writes the given local file header data and filename out to the output
1298 * stream. It also appends a new Entry with the data and filename.
1299 */
1300 void put_local_header(LocalFileHeaderData data,
1301 const(char)[] file_name)
1302 {
1303 auto f_name = Path.normalize(file_name);
1304 auto p = Path.parse(f_name);
1305
1306 // Compute Zip version
1307 if( data.extract_version == data.extract_version.max )
1308 {
1309
1310 ushort zipver = 10;
1311 void minver(ushort v) { zipver = v>zipver ? v : zipver; }
1312
1313 {
1314 // Compression method
1315 switch( data.compression_method )
1316 {
1317 case 0: minver(10); break;
1318 case 8: minver(20); break;
1319 default:
1320 assert(false);
1321 }
1322
1323 // File is a folder
1324 if( f_name.length > 0 && f_name[$-1] == '/' )
1325 // Is a directory, not a real file
1326 minver(20);
1327 }
1328 data.extract_version = zipver;
1329 }
1330
1331 /+// Encode filename
1332 auto file_name_437 = utf8_to_cp437(file_name);
1333 if( file_name_437 is null )
1334 ZipException.fnencode;+/
1335
1336 /+// Set up file name length
1337 if( file_name_437.length > ushort.max )
1338 ZipException.fntoolong;
1339
1340 data.file_name_length = file_name_437.length;+/
1341
1342 LocalFileHeader header;
1343 header.data = data;
1344 if (p.isAbsolute)
1345 f_name = f_name[p.root.length+1..$];
1346 header.file_name = f_name;
1347
1348 // Write out the header and the filename
1349 auto header_pos = seeker.seek(0, seeker.Anchor.Current);
1350
1351 write(output, LocalFileHeader.signature);
1352 header.put(output);
1353
1354 // Save the header
1355 assert( header_pos <= int.max );
1356 Entry entry;
1357 entry.data.fromLocal(header.data);
1358 entry.filename = header.file_name;
1359 entry.header_position = header_pos;
1360 entry.data.relative_offset_of_local_header = cast(int) header_pos;
1361 entries ~= entry;
1362 }
1363 }
1364
1365 //////////////////////////////////////////////////////////////////////////////
1366 //////////////////////////////////////////////////////////////////////////////
1367 //
1368 // ZipEntry
1369
1370 /**
1371 * This class is used to represent a single entry in an archive.
1372 * Specifically, it combines meta-data about the file (see the info field)
1373 * along with the two basic operations on an entry: open and verify.
1374 */
1375 class ZipEntry
1376 {
1377 /**
1378 * Header information on the file. See the ZipEntryInfo structure for
1379 * more information.
1380 */
1381 ZipEntryInfo info;
1382
1383 /**
1384 * Size (in bytes) of the file's uncompressed contents.
1385 */
1386 uint size()
1387 {
1388 return header.data.uncompressed_size;
1389 }
1390
1391 /**
1392 * Opens a stream for reading from the file. The contents of this stream
1393 * represent the decompressed contents of the file stored in the archive.
1394 *
1395 * You should not assume that the returned stream is seekable.
1396 *
1397 * Note that the returned stream may be safely closed without affecting
1398 * the underlying archive stream.
1399 *
1400 * If the file has not yet been verified, then the stream will be checked
1401 * as you read from it. When the stream is either exhausted or closed,
1402 * then the integrity of the file's data will be checked. This means that
1403 * if the file is corrupt, an exception will be thrown only after you have
1404 * finished reading from the stream. If you wish to make sure the data is
1405 * valid before you read from the file, call the verify method.
1406 */
1407 InputStream open()
1408 {
1409 // If we haven't verified yet, wrap the stream in the appropriate
1410 // decorators.
1411 if( !verified )
1412 return new ZipEntryVerifier(this, open_dg(header, false));
1413
1414 else
1415 return open_dg(header, false);
1416 }
1417
1418 /**
1419 * Verifies the contents of this file by computing the CRC32 checksum,
1420 * and comparing it against the stored one. Throws an exception if the
1421 * checksums do not match.
1422 *
1423 * Not valid on streamed Zip archives.
1424 */
1425 void verify()
1426 {
1427 // If we haven't verified the contents yet, just read everything in
1428 // to trigger it.
1429 auto s = open();
1430 auto buffer = new ubyte[s.conduit.bufferSize];
1431 while( s.read(buffer) != s.Eof )
1432 {/*Do nothing*/}
1433 s.close();
1434 }
1435
1436 /**
1437 * Creates a new, independent copy of this instance.
1438 */
1439 ZipEntry dup()
1440 {
1441 return new ZipEntry(header, open_dg);
1442 }
1443
1444 private:
1445 /*
1446 * Callback used to open the file.
1447 */
1448 alias InputStream delegate(FileHeader, bool raw) open_dg_t;
1449 open_dg_t open_dg;
1450
1451 /*
1452 * Raw ZIP header.
1453 */
1454 FileHeader header;
1455
1456 /*
1457 * The flag used to keep track of whether the file's contents have been
1458 * verified.
1459 */
1460 bool verified = false;
1461
1462 /*
1463 * Opens a stream that does not perform any decompression or
1464 * transformation of the file contents. This is used internally by
1465 * ZipWriter to perform fast zip to zip transfers without having to
1466 * decompress and then recompress the contents.
1467 *
1468 * Note that because zip stores CRCs for the *uncompressed* data, this
1469 * method currently does not do any verification.
1470 */
1471 InputStream open_raw()
1472 {
1473 return open_dg(header, true);
1474 }
1475
1476 /*
1477 * Creates a new ZipEntry from the FileHeader.
1478 */
1479 this(FileHeader header, open_dg_t open_dg)
1480 {
1481 this.reset(header, open_dg);
1482 }
1483
1484 /*
1485 * Resets the current instance with new values.
1486 */
1487 ZipEntry reset(FileHeader header, open_dg_t open_dg)
1488 {
1489 this.header = header;
1490 this.open_dg = open_dg;
1491 with( info )
1492 {
1493 name = Path.standard(header.file_name.dup);
1494 dosToTime(header.data.modification_file_time,
1495 header.data.modification_file_date,
1496 modified);
1497 comment = header.file_comment.dup;
1498 }
1499
1500 this.verified = false;
1501
1502 return this;
1503 }
1504 }
1505
1506 /**
1507 * This structure contains various pieces of meta-data on a file. The
1508 * contents of this structure may be safely mutated.
1509 *
1510 * This structure is also used to specify meta-data about a file when adding
1511 * it to an archive.
1512 */
1513 struct ZipEntryInfo
1514 {
1515 /// Full path and file name of this file.
1516 const(char)[] name;
1517 /// Modification timestamp. If this is left uninitialised when passed to
1518 /// a ZipWriter, it will be reset to the current system time.
1519 Time modified = Time.min;
1520 /// Comment on the file.
1521 const(char)[] comment;
1522 }
1523
1524 //////////////////////////////////////////////////////////////////////////////
1525 //////////////////////////////////////////////////////////////////////////////
1526 //////////////////////////////////////////////////////////////////////////////
1527 //
1528 // Exceptions
1529 //
1530
1531 /**
1532 * This is the base class from which all exceptions generated by this module
1533 * derive from.
1534 */
1535 class ZipException : Exception
1536 {
1537 this(immutable(char)[] msg) { super(msg); }
1538
1539 private:
1540 alias typeof(this) thisT;
1541 static void opCall(immutable(char)[] msg) { throw new ZipException(msg); }
1542
1543 @property static void badsig()
1544 {
1545 thisT("corrupt signature or unexpected section found");
1546 }
1547
1548 @property static void badsig(const(char)[] type)
1549 {
1550 thisT("corrupt "~type.idup~" signature or unexpected section found");
1551 }
1552
1553 @property static void incons(const(char)[] name)
1554 {
1555 thisT("inconsistent headers for file \""~name.idup~"\"; "
1556 "archive is likely corrupted");
1557 }
1558
1559 @property static void missingdir()
1560 {
1561 thisT("could not locate central archive directory; "
1562 "file is corrupt or possibly not a Zip archive");
1563 }
1564
1565 @property static void toomanyentries()
1566 {
1567 thisT("too many archive entries");
1568 }
1569
1570 @property static void toolong()
1571 {
1572 thisT("archive is too long; limited to 4GB total");
1573 }
1574
1575 @property static void cdtoolong()
1576 {
1577 thisT("central directory is too long; limited to 4GB total");
1578 }
1579
1580 @property static void fntoolong()
1581 {
1582 thisT("file name too long; limited to 65,535 characters");
1583 }
1584
1585 @property static void eftoolong()
1586 {
1587 thisT("extra field too long; limited to 65,535 characters");
1588 }
1589
1590 @property static void cotoolong()
1591 {
1592 thisT("extra field too long; limited to 65,535 characters");
1593 }
1594
1595 @property static void fnencode()
1596 {
1597 thisT("could not encode filename into codepage 437");
1598 }
1599
1600 @property static void coencode()
1601 {
1602 thisT("could not encode comment into codepage 437");
1603 }
1604
1605 @property static void tooold()
1606 {
1607 thisT("cannot represent dates before January 1, 1980");
1608 }
1609 }
1610
1611 /**
1612 * This exception is thrown if a ZipReader detects that a file's contents do
1613 * not match the stored checksum.
1614 */
1615 class ZipChecksumException : ZipException
1616 {
1617 this(const(char)[] name)
1618 {
1619 super("checksum failed on zip entry \""~name.idup~"\"");
1620 }
1621
1622 private:
1623 static void opCall(const(char)[] name) { throw new ZipChecksumException(name); }
1624 }
1625
1626 /**
1627 * This exception is thrown if you call get reader method when there are no
1628 * more files in the archive.
1629 */
1630 class ZipExhaustedException : ZipException
1631 {
1632 this() { super("no more entries in archive"); }
1633
1634 private:
1635 static void opCall() { throw new ZipExhaustedException; }
1636 }
1637
1638 /**
1639 * This exception is thrown if you attempt to read an archive that uses
1640 * features not supported by the reader.
1641 */
1642 class ZipNotSupportedException : ZipException
1643 {
1644 this(immutable(char)[] msg) { super(msg); }
1645
1646 private:
1647 alias ZipNotSupportedException thisT;
1648
1649 static void opCall(const(char)[] msg)
1650 {
1651 throw new thisT(msg.idup ~ " not supported");
1652 }
1653
1654 static void spanned()
1655 {
1656 thisT("split and multi-disk archives");
1657 }
1658
1659 static void zipver(ushort ver)
1660 {
1661 throw new thisT("zip format version "
1662 ~Integer.toString(ver / 10).idup
1663 ~"."
1664 ~Integer.toString(ver % 10).idup
1665 ~" not supported; maximum of version "
1666 ~Integer.toString(MAX_EXTRACT_VERSION / 10).idup
1667 ~"."
1668 ~Integer.toString(MAX_EXTRACT_VERSION % 10).idup
1669 ~" supported.");
1670 }
1671
1672 static void flags()
1673 {
1674 throw new thisT("unknown or unsupported file flags enabled");
1675 }
1676
1677 static void method(ushort m)
1678 {
1679 // Cheat here and work out what the method *actually* is
1680 immutable(char)[] ms;
1681 switch( m )
1682 {
1683 case 0:
1684 case 8: assert(false); // supported
1685
1686 case 1: ms = "Shrink"; break;
1687 case 2: ms = "Reduce (factor 1)"; break;
1688 case 3: ms = "Reduce (factor 2)"; break;
1689 case 4: ms = "Reduce (factor 3)"; break;
1690 case 5: ms = "Reduce (factor 4)"; break;
1691 case 6: ms = "Implode"; break;
1692
1693 case 9: ms = "Deflate64"; break;
1694 case 10: ms = "TERSE (old)"; break;
1695
1696 case 12: ms = "Bzip2"; break;
1697 case 14: ms = "LZMA"; break;
1698
1699 case 18: ms = "TERSE (new)"; break;
1700 case 19: ms = "LZ77"; break;
1701
1702 case 97: ms = "WavPack"; break;
1703 case 98: ms = "PPMd"; break;
1704
1705 default: ms = "unknown";
1706 }
1707
1708 thisT(ms ~ " compression method");
1709 }
1710 }
1711
1712 //////////////////////////////////////////////////////////////////////////////
1713 //////////////////////////////////////////////////////////////////////////////
1714 //
1715 // Convenience methods
1716
1717 void createArchive(const(char)[] archive, Method method, const(char[])[] files...)
1718 {
1719 scope zw = new ZipBlockWriter(archive);
1720 zw.method = method;
1721
1722 foreach( file ; files )
1723 {
1724 ZipEntryInfo zi;
1725 zi.name = file;
1726 zi.modified = Path.modified(file);
1727
1728 zw.putFile(zi, file);
1729 }
1730
1731 zw.finish();
1732 }
1733
1734 void extractArchive(const(char)[] archive, const(char)[] dest)
1735 {
1736 scope zr = new ZipBlockReader(archive);
1737
1738 foreach( entry ; zr )
1739 {
1740 // Skip directories
1741 if( entry.info.name[$-1] == '/' ||
1742 entry.info.name[$-1] == '\\') continue;
1743
1744 auto path = Path.join(dest, entry.info.name);
1745 path = Path.normalize(path);
1746
1747 // Create the parent directory if necessary.
1748 auto parent = Path.parse(path).parent;
1749 if( !Path.exists(parent) )
1750 {
1751 Path.createPath(parent);
1752 }
1753
1754 path = Path.native(path);
1755
1756 // Write out the file
1757 scope fout = new File(path, File.WriteCreate);
1758 fout.copy(entry.open());
1759 fout.close();
1760
1761 // Update timestamps
1762 auto oldTS = Path.timeStamps(path);
1763 Path.timeStamps(path, oldTS.accessed, entry.info.modified);
1764 }
1765 }
1766
1767 //////////////////////////////////////////////////////////////////////////////
1768 //////////////////////////////////////////////////////////////////////////////
1769 //////////////////////////////////////////////////////////////////////////////
1770 //
1771 // Private implementation stuff
1772 //
1773
1774 private:
1775
1776 //////////////////////////////////////////////////////////////////////////////
1777 //////////////////////////////////////////////////////////////////////////////
1778 //
1779 // Verification stuff
1780
1781 /*
1782 * This class wraps an input stream, and computes the CRC as it passes
1783 * through. On the event of either a close or EOF, it checks the CRC against
1784 * the one in the provided ZipEntry. If they don't match, it throws an
1785 * exception.
1786 */
1787
1788 class ZipEntryVerifier : InputStream
1789 {
1790 this(ZipEntry entry, InputStream source)
1791 in
1792 {
1793 assert( entry !is null );
1794 assert( source !is null );
1795 }
1796 body
1797 {
1798 this.entry = entry;
1799 this.digest = new Crc32;
1800 this.source = new DigestInput(source, digest);
1801 }
1802
1803 IConduit conduit()
1804 {
1805 return source.conduit;
1806 }
1807
1808 InputStream input()
1809 {
1810 return source;
1811 }
1812
1813 long seek (long ofs, Anchor anchor = Anchor.Begin)
1814 {
1815 return source.seek (ofs, anchor);
1816 }
1817
1818 void close()
1819 {
1820 check();
1821
1822 this.source.close();
1823 this.entry = null;
1824 this.digest = null;
1825 this.source = null;
1826 }
1827
1828 size_t read(void[] dst)
1829 {
1830 auto bytes = source.read(dst);
1831 if( bytes == IConduit.Eof )
1832 check();
1833 return bytes;
1834 }
1835
1836 override void[] load(size_t max=-1)
1837 {
1838 return Conduit.load(this, max);
1839 }
1840
1841 override InputStream flush()
1842 {
1843 this.source.flush();
1844 return this;
1845 }
1846
1847 private:
1848 Crc32 digest;
1849 InputStream source;
1850 ZipEntry entry;
1851
1852 void check()
1853 {
1854 if( digest is null ) return;
1855
1856 auto crc = digest.crc32Digest();
1857 delete digest;
1858
1859 if( crc != entry.header.data.crc_32 )
1860 ZipChecksumException(entry.info.name);
1861
1862 else
1863 entry.verified = true;
1864 }
1865 }
1866
1867 //////////////////////////////////////////////////////////////////////////////
1868 //////////////////////////////////////////////////////////////////////////////
1869 //
1870 // IO functions
1871
1872 /*
1873 * Really, seriously, read some bytes without having to go through a sodding
1874 * buffer.
1875 */
1876 void readExact(InputStream s, void[] dst)
1877 {
1878 //Stderr.formatln("readExact(s, [0..{}])", dst.length);
1879 while( dst.length > 0 )
1880 {
1881 auto octets = s.read(dst);
1882 //Stderr.formatln(" . octets = {}", octets);
1883 if( octets == -1 ) // Beware the dangers of MAGICAL THINKING
1884 throw new Exception("unexpected end of stream");
1885 dst = dst[octets..$];
1886 }
1887 }
1888
1889 /*
1890 * Really, seriously, write some bytes.
1891 */
1892 void writeExact(OutputStream s, const(void)[] src)
1893 {
1894 while( src.length > 0 )
1895 {
1896 auto octets = s.write(src);
1897 if( octets == -1 )
1898 throw new Exception("unexpected end of stream");
1899 src = src[octets..$];
1900 }
1901 }
1902
1903 void write(T)(OutputStream s, T value)
1904 {
1905 version( BigEndian ) swap(value);
1906 writeExact(s, (&value)[0..1]);
1907 }
1908
1909 //////////////////////////////////////////////////////////////////////////////
1910 //////////////////////////////////////////////////////////////////////////////
1911 //
1912 // Endian garbage
1913
1914 void swapAll(T)(ref T data)
1915 {
1916 static if( is(typeof(T.record_fields)) )
1917 const fields = T.record_fields;
1918 else
1919 const fields = data.tupleof.length;
1920
1921 foreach( i,_ ; data.tupleof )
1922 {
1923 if( i == fields ) break;
1924 swap(data.tupleof[i]);
1925 }
1926 }
1927
1928 void swap(T)(ref T data)
1929 {
1930 static if( T.sizeof == 1 )
1931 {}
1932 else static if( T.sizeof == 2 )
1933 ByteSwap.swap16(&data, 2);
1934 else static if( T.sizeof == 4 )
1935 ByteSwap.swap32(&data, 4);
1936 else static if( T.sizeof == 8 )
1937 ByteSwap.swap64(&data, 8);
1938 else static if( T.sizeof == 10 )
1939 ByteSwap.swap80(&data, 10);
1940 else
1941 static assert(false, "Can't swap "~T.stringof~"s.");
1942 }
1943
1944 //////////////////////////////////////////////////////////////////////////////
1945 //////////////////////////////////////////////////////////////////////////////
1946 //
1947 // IBM Code Page 437 stuff
1948 //
1949
1950 const char[][] cp437_to_utf8_map_low = [
1951 "\u0000"[], "\u263a", "\u263b", "\u2665",
1952 "\u2666", "\u2663", "\u2660", "\u2022",
1953 "\u25d8", "\u25cb", "\u25d9", "\u2642",
1954 "\u2640", "\u266a", "\u266b", "\u263c",
1955
1956 "\u25b6", "\u25c0", "\u2195", "\u203c",
1957 "\u00b6", "\u00a7", "\u25ac", "\u21a8",
1958 "\u2191", "\u2193", "\u2192", "\u2190",
1959 "\u221f", "\u2194", "\u25b2", "\u25bc"
1960 ];
1961
1962 const char[][] cp437_to_utf8_map_high = [
1963 "\u00c7"[], "\u00fc", "\u00e9", "\u00e2",
1964 "\u00e4", "\u00e0", "\u00e5", "\u00e7",
1965 "\u00ea", "\u00eb", "\u00e8", "\u00ef",
1966 "\u00ee", "\u00ec", "\u00c4", "\u00c5",
1967
1968 "\u00c9", "\u00e6", "\u00c6", "\u00f4",
1969 "\u00f6", "\u00f2", "\u00fb", "\u00f9",
1970 "\u00ff", "\u00d6", "\u00dc", "\u00f8",
1971 "\u00a3", "\u00a5", "\u20a7", "\u0192",
1972
1973 "\u00e1", "\u00ed", "\u00f3", "\u00fa",
1974 "\u00f1", "\u00d1", "\u00aa", "\u00ba",
1975 "\u00bf", "\u2310", "\u00ac", "\u00bd",
1976 "\u00bc", "\u00a1", "\u00ab", "\u00bb",
1977
1978 "\u2591", "\u2592", "\u2593", "\u2502",
1979 "\u2524", "\u2561", "\u2562", "\u2556",
1980 "\u2555", "\u2563", "\u2551", "\u2557",
1981 "\u255d", "\u255c", "\u255b", "\u2510",
1982
1983 "\u2514", "\u2534", "\u252c", "\u251c",
1984 "\u2500", "\u253c", "\u255e", "\u255f",
1985 "\u255a", "\u2554", "\u2569", "\u2566",
1986 "\u2560", "\u2550", "\u256c", "\u2567",
1987
1988 "\u2568", "\u2564", "\u2565", "\u2559",
1989 "\u2558", "\u2552", "\u2553", "\u256b",
1990 "\u256a", "\u2518", "\u250c", "\u2588",
1991 "\u2584", "\u258c", "\u2590", "\u2580",
1992 "\u03b1", "\u00df", "\u0393", "\u03c0",
1993 "\u03a3", "\u03c3", "\u00b5", "\u03c4",
1994 "\u03a6", "\u0398", "\u03a9", "\u03b4",
1995 "\u221e", "\u03c6", "\u03b5", "\u2229",
1996
1997 "\u2261", "\u00b1", "\u2265", "\u2264",
1998 "\u2320", "\u2321", "\u00f7", "\u2248",
1999 "\u00b0", "\u2219", "\u00b7", "\u221a",
2000 "\u207f", "\u00b2", "\u25a0", "\u00a0"
2001 ];
2002
2003 inout(char[]) cp437_to_utf8(inout(ubyte[]) s)
2004 {
2005 foreach( i,c ; s )
2006 {
2007 if( (1 <= c && c <= 31) || c >= 127 )
2008 {
2009 /* Damn; we got a character not in ASCII. Since this is the first
2010 * non-ASCII character we found, copy everything up to this point
2011 * into the output verbatim. We'll allocate twice as much space
2012 * as there are remaining characters to ensure we don't need to do
2013 * any further allocations.
2014 */
2015 auto r = new char[i+2*(s.length-i)];
2016 r[0..i] = (cast(char[]) s[0..i])[];
2017 size_t k=i; // current length
2018
2019 // We insert new characters at r[i+j+k]
2020
2021 foreach( d ; s[i..$] )
2022 {
2023 if( 32 <= d && d <= 126 || d == 0 )
2024 {
2025 r[k++] = d;
2026 }
2027 else if( 1 <= d && d <= 31 )
2028 {
2029 const(char)[] repl = cp437_to_utf8_map_low[d];
2030 r[k..k+repl.length] = repl[];
2031 k += repl.length;
2032 }
2033 else if( d == 127 )
2034 {
2035 const(char)[] repl = "\u2302";
2036 r[k..k+repl.length] = repl[];
2037 k += repl.length;
2038 }
2039 else if( d > 127 )
2040 {
2041 const(char)[] repl = cp437_to_utf8_map_high[d-128];
2042 r[k..k+repl.length] = repl[];
2043 k += repl.length;
2044 }
2045 else
2046 assert(false);
2047 }
2048
2049 return cast(typeof(return))r[0..k];
2050 }
2051 }
2052
2053 /* If we got here, then all the characters in s are also in ASCII, which
2054 * means it's also valid UTF-8; return the string unmodified.
2055 */
2056 return cast(typeof(return)) s;
2057 }
2058
2059 debug( UnitTest )
2060 {
2061 unittest
2062 {
2063 const(char)[] c(const(char)[] s) { return cp437_to_utf8(cast(const(ubyte)[]) s); }
2064
2065 auto s = c("Hi there \x01 old \x0c!");
2066 assert( s == "Hi there \u263a old \u2640!", "\""~s~"\"" );
2067 s = c("Marker \x7f and divide \xf6.");
2068 assert( s == "Marker \u2302 and divide \u00f7.", "\""~s~"\"" );
2069 }
2070 }
2071
2072 __gshared const char[dchar] utf8_to_cp437_map;
2073
2074 shared static this()
2075 {
2076 utf8_to_cp437_map = [
2077 '\u0000': '\x00', '\u263a': '\x01', '\u263b': '\x02', '\u2665': '\x03',
2078 '\u2666': '\x04', '\u2663': '\x05', '\u2660': '\x06', '\u2022': '\x07',
2079 '\u25d8': '\x08', '\u25cb': '\x09', '\u25d9': '\x0a', '\u2642': '\x0b',
2080 '\u2640': '\x0c', '\u266a': '\x0d', '\u266b': '\x0e', '\u263c': '\x0f',
2081
2082 '\u25b6': '\x10', '\u25c0': '\x11', '\u2195': '\x12', '\u203c': '\x13',
2083 '\u00b6': '\x14', '\u00a7': '\x15', '\u25ac': '\x16', '\u21a8': '\x17',
2084 '\u2191': '\x18', '\u2193': '\x19', '\u2192': '\x1a', '\u2190': '\x1b',
2085 '\u221f': '\x1c', '\u2194': '\x1d', '\u25b2': '\x1e', '\u25bc': '\x1f',
2086
2087 /*
2088 * Printable ASCII range (well, most of it) is handled specially.
2089 */
2090
2091 '\u00c7': '\x80', '\u00fc': '\x81', '\u00e9': '\x82', '\u00e2': '\x83',
2092 '\u00e4': '\x84', '\u00e0': '\x85', '\u00e5': '\x86', '\u00e7': '\x87',
2093 '\u00ea': '\x88', '\u00eb': '\x89', '\u00e8': '\x8a', '\u00ef': '\x8b',
2094 '\u00ee': '\x8c', '\u00ec': '\x8d', '\u00c4': '\x8e', '\u00c5': '\x8f',
2095
2096 '\u00c9': '\x90', '\u00e6': '\x91', '\u00c6': '\x92', '\u00f4': '\x93',
2097 '\u00f6': '\x94', '\u00f2': '\x95', '\u00fb': '\x96', '\u00f9': '\x97',
2098 '\u00ff': '\x98', '\u00d6': '\x99', '\u00dc': '\x9a', '\u00f8': '\x9b',
2099 '\u00a3': '\x9c', '\u00a5': '\x9d', '\u20a7': '\x9e', '\u0192': '\x9f',
2100
2101 '\u00e1': '\xa0', '\u00ed': '\xa1', '\u00f3': '\xa2', '\u00fa': '\xa3',
2102 '\u00f1': '\xa4', '\u00d1': '\xa5', '\u00aa': '\xa6', '\u00ba': '\xa7',
2103 '\u00bf': '\xa8', '\u2310': '\xa9', '\u00ac': '\xaa', '\u00bd': '\xab',
2104 '\u00bc': '\xac', '\u00a1': '\xad', '\u00ab': '\xae', '\u00bb': '\xaf',
2105
2106 '\u2591': '\xb0', '\u2592': '\xb1', '\u2593': '\xb2', '\u2502': '\xb3',
2107 '\u2524': '\xb4', '\u2561': '\xb5', '\u2562': '\xb6', '\u2556': '\xb7',
2108 '\u2555': '\xb8', '\u2563': '\xb9', '\u2551': '\xba', '\u2557': '\xbb',
2109 '\u255d': '\xbc', '\u255c': '\xbd', '\u255b': '\xbe', '\u2510': '\xbf',
2110
2111 '\u2514': '\xc0', '\u2534': '\xc1', '\u252c': '\xc2', '\u251c': '\xc3',
2112 '\u2500': '\xc4', '\u253c': '\xc5', '\u255e': '\xc6', '\u255f': '\xc7',
2113 '\u255a': '\xc8', '\u2554': '\xc9', '\u2569': '\xca', '\u2566': '\xcb',
2114 '\u2560': '\xcc', '\u2550': '\xcd', '\u256c': '\xce', '\u2567': '\xcf',
2115
2116 '\u2568': '\xd0', '\u2564': '\xd1', '\u2565': '\xd2', '\u2559': '\xd3',
2117 '\u2558': '\xd4', '\u2552': '\xd5', '\u2553': '\xd6', '\u256b': '\xd7',
2118 '\u256a': '\xd8', '\u2518': '\xd9', '\u250c': '\xda', '\u2588': '\xdb',
2119 '\u2584': '\xdc', '\u258c': '\xdd', '\u2590': '\xde', '\u2580': '\xdf',
2120
2121 '\u03b1': '\xe0', '\u00df': '\xe1', '\u0393': '\xe2', '\u03c0': '\xe3',
2122 '\u03a3': '\xe4', '\u03c3': '\xe5', '\u00b5': '\xe6', '\u03c4': '\xe7',
2123 '\u03a6': '\xe8', '\u0398': '\xe9', '\u03a9': '\xea', '\u03b4': '\xeb',
2124 '\u221e': '\xec', '\u03c6': '\xed', '\u03b5': '\xee', '\u2229': '\xef',
2125
2126 '\u2261': '\xf0', '\u00b1': '\xf1', '\u2265': '\xf2', '\u2264': '\xf3',
2127 '\u2320': '\xf4', '\u2321': '\xf5', '\u00f7': '\xf6', '\u2248': '\xf7',
2128 '\u00b0': '\xf8', '\u2219': '\xf9', '\u00b7': '\xfa', '\u221a': '\xfb',
2129 '\u207f': '\xfc', '\u00b2': '\xfd', '\u25a0': '\xfe', '\u00a0': '\xff'
2130 ];
2131 }
2132
2133 inout(ubyte)[] utf8_to_cp437(inout(char)[] s)
2134 {
2135 alias typeof(return) ret_type; /* Some sort of strange bug here */
2136 ubyte[] bug_6867(const(char)[] cs)
2137 {
2138 foreach( i,dchar c ; cs )
2139 {
2140 if( !((32 <= c && c <= 126) || c == 0) )
2141 {
2142 /* We got a character not in CP 437: we need to create a buffer to
2143 * hold the new string. Since UTF-8 is *always* larger than CP
2144 * 437, we need, at most, an array of the same number of elements.
2145 */
2146 auto r = new ubyte[cs.length];
2147 r[0..i] = (cast(ubyte[]) cs[0..i])[];
2148 size_t k=i;
2149
2150 foreach( dchar d ; cs[i..$] )
2151 {
2152 if( 32 <= d && d <= 126 || d == 0 )
2153 r[k++] = cast(ubyte)d;
2154
2155 else if( d == '\u2302' )
2156 r[k++] = '\x7f';
2157
2158 else if( auto e_ptr = d in utf8_to_cp437_map )
2159 r[k++] = *e_ptr;
2160
2161 else
2162 {
2163 throw new Exception("cannot encode character \""
2164 ~ Integer.toString(cast(uint)d).idup
2165 ~ "\" in codepage 437.");
2166 }
2167 }
2168
2169 return r[0..k];
2170 }
2171 }
2172
2173 return null;
2174 }
2175
2176 auto ret = bug_6867(s);
2177 if (ret !is null)
2178 return cast(ret_type)ret;
2179
2180 // If we got here, then the entire string is printable ASCII, which just
2181 // happens to *also* be valid CP 437! Huzzah!
2182 return cast(typeof(return)) s;
2183 }
2184
2185 debug( UnitTest )
2186 {
2187 unittest
2188 {
2189 alias cp437_to_utf8 x;
2190 alias utf8_to_cp437 y;
2191
2192 ubyte[256] s;
2193 foreach( i,ref c ; s )
2194 c = cast(ubyte)i;
2195
2196 auto a = x(s);
2197 auto b = y(a);
2198 if(!( b == s ))
2199 {
2200 // Display list of characters that failed to convert as expected,
2201 // and what value we got.
2202 auto hex = "0123456789abcdef";
2203 auto msg = "".dup;
2204 foreach( i,ch ; b )
2205 {
2206 if( ch != i )
2207 {
2208 msg ~= hex[i>>4];
2209 msg ~= hex[i&15];
2210 msg ~= " (";
2211 msg ~= hex[ch>>4];
2212 msg ~= hex[ch&15];
2213 msg ~= "), ";
2214 }
2215 }
2216 msg ~= "failed.";
2217
2218 assert( false, msg );
2219 }
2220 }
2221 }
2222
2223 /*
2224 * This is here to simplify the code elsewhere.
2225 */
2226 inout(char[]) utf8_to_utf8(inout(ubyte[]) s) { return cast(typeof(return)) s; }
2227 ubyte[] utf8_to_utf8(char[] s) { return cast(ubyte[]) s; }
2228
2229 //////////////////////////////////////////////////////////////////////////////
2230 //////////////////////////////////////////////////////////////////////////////
2231 //
2232 // Date/time stuff
2233
2234 void dosToTime(ushort dostime, ushort dosdate, out Time time)
2235 {
2236 uint sec, min, hour, day, mon, year;
2237 sec = (dostime & 0b00000_000000_11111) * 2;
2238 min = (dostime & 0b00000_111111_00000) >> 5;
2239 hour= (dostime & 0b11111_000000_00000) >> 11;
2240 day = (dosdate & 0b0000000_0000_11111);
2241 mon = (dosdate & 0b0000000_1111_00000) >> 5;
2242 year=((dosdate & 0b1111111_0000_00000) >> 9) + 1980;
2243
2244 // This code rules!
2245 time = Gregorian.generic.toTime(year, mon, day, hour, min, sec);
2246 }
2247
2248 void timeToDos(Time time, out ushort dostime, out ushort dosdate)
2249 {
2250 // Treat Time.min specially
2251 if( time == Time.min )
2252 time = WallClock.now;
2253
2254 // *muttering happily*
2255 auto date = Gregorian.generic.toDate(time);
2256 if( date.year < 1980 )
2257 ZipException.tooold;
2258
2259 auto tod = time.time();
2260 dostime = cast(ushort) (
2261 (tod.seconds / 2)
2262 | (tod.minutes << 5)
2263 | (tod.hours << 11));
2264
2265 dosdate = cast(ushort) (
2266 (date.day)
2267 | (date.month << 5)
2268 | ((date.year - 1980) << 9));
2269 }
2270
2271 // ************************************************************************** //
2272 // ************************************************************************** //
2273 // ************************************************************************** //
2274
2275 // Dependencies
2276 private:
2277
2278 import tango.io.device.Conduit : Conduit;
2279
2280 /*******************************************************************************
2281
2282 copyright: Copyright © 2007 Daniel Keep. All rights reserved.
2283
2284 license: BSD style: $(LICENSE)
2285
2286 version: Prerelease
2287
2288 author: Daniel Keep
2289
2290 *******************************************************************************/
2291
2292 //module tangox.io.stream.CounterStream;
2293
2294 //import tango.io.device.Conduit : Conduit;
2295 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream;
2296
2297 /**
2298 * The counter stream classes are used to keep track of how many bytes flow
2299 * through a stream.
2300 *
2301 * To use them, simply wrap it around an existing stream. The number of bytes
2302 * that have flowed through the wrapped stream may be accessed using the
2303 * count member.
2304 */
2305 class CounterInput : InputStream
2306 {
2307 ///
2308 this(InputStream input)
2309 in
2310 {
2311 assert( input !is null );
2312 }
2313 body
2314 {
2315 this.source = input;
2316 }
2317
2318 override IConduit conduit()
2319 {
2320 return source.conduit;
2321 }
2322
2323 InputStream input()
2324 {
2325 return source;
2326 }
2327
2328 long seek (long ofs, Anchor anchor = Anchor.Begin)
2329 {
2330 return source.seek (ofs, anchor);
2331 }
2332
2333 override void close()
2334 {
2335 source.close();
2336 source = null;
2337 }
2338
2339 override size_t read(void[] dst)
2340 {
2341 auto read = source.read(dst);
2342 if( read != IConduit.Eof )
2343 _count += read;
2344 return read;
2345 }
2346
2347 override void[] load(size_t max=-1)
2348 {
2349 return Conduit.load(this, max);
2350 }
2351
2352 override InputStream flush()
2353 {
2354 source.flush();
2355 return this;
2356 }
2357
2358 ///
2359 long count() { return _count; }
2360
2361 private:
2362 InputStream source;
2363 long _count;
2364 }
2365
2366 /// ditto
2367 class CounterOutput : OutputStream
2368 {
2369 ///
2370 this(OutputStream output)
2371 in
2372 {
2373 assert( output !is null );
2374 }
2375 body
2376 {
2377 this.sink = output;
2378 }
2379
2380 override IConduit conduit()
2381 {
2382 return sink.conduit;
2383 }
2384
2385 OutputStream output()
2386 {
2387 return sink;
2388 }
2389
2390 long seek (long ofs, Anchor anchor = Anchor.Begin)
2391 {
2392 return sink.seek (ofs, anchor);
2393 }
2394
2395 override void close()
2396 {
2397 sink.close();
2398 sink = null;
2399 }
2400
2401 override size_t write(const(void)[] dst)
2402 {
2403 auto wrote = sink.write(dst);
2404 if( wrote != IConduit.Eof )
2405 _count += wrote;
2406 return wrote;
2407 }
2408
2409 override OutputStream copy(InputStream src, size_t max=-1)
2410 {
2411 Conduit.transfer(src, this, max);
2412 return this;
2413 }
2414
2415 override OutputStream flush()
2416 {
2417 sink.flush();
2418 return this;
2419 }
2420
2421 ///
2422 long count() { return _count; }
2423
2424 private:
2425 OutputStream sink;
2426 long _count;
2427 }
2428
2429 /*******************************************************************************
2430
2431 copyright: Copyright © 2007 Daniel Keep. All rights reserved.
2432
2433 license: BSD style: $(LICENSE)
2434
2435 version: Prerelease
2436
2437 author: Daniel Keep
2438
2439 *******************************************************************************/
2440
2441 //module tangox.io.stream.SliceStream;
2442
2443 //import tango.io.device.Conduit : Conduit;
2444 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream;
2445
2446 /**
2447 * This stream can be used to provide stream-based access to a subset of
2448 * another stream. It is akin to slicing an array.
2449 *
2450 * This stream fully supports seeking, and as such requires that the
2451 * underlying stream also support seeking.
2452 */
2453 class SliceSeekInputStream : InputStream
2454 {
2455 //alias IConduit.Seek.Anchor Anchor;
2456
2457 /**
2458 * Create a new slice stream from the given source, covering the content
2459 * starting at position begin, for length bytes.
2460 */
2461 this(InputStream source, long begin, long length)
2462 in
2463 {
2464 assert( source !is null );
2465 assert( (cast(IConduit.Seek) source.conduit) !is null );
2466 assert( begin >= 0 );
2467 assert( length >= 0 );
2468 }
2469 body
2470 {
2471 this.source = source;
2472 this.seeker = source; //cast(IConduit.Seek) source;
2473 this.begin = begin;
2474 this.length = length;
2475 }
2476
2477 override IConduit conduit()
2478 {
2479 return source.conduit;
2480 }
2481
2482 override void close()
2483 {
2484 source = null;
2485 seeker = null;
2486 }
2487
2488 override size_t read(void[] dst)
2489 {
2490 // If we're at the end of the slice, return eof
2491 if( _position >= length )
2492 return IConduit.Eof;
2493
2494 // Otherwise, make sure we don't try to read past the end of the slice
2495 if( _position+dst.length > length )
2496 dst.length = cast(size_t) (length-_position);
2497
2498 // Seek source stream to the appropriate location.
2499 if( seeker.seek(0, Anchor.Current) != begin+_position )
2500 seeker.seek(begin+_position, Anchor.Begin);
2501
2502 // Do the read
2503 auto read = source.read(dst);
2504 if( read == IConduit.Eof )
2505 // If we got an Eof, we'll consider that a bug for the moment.
2506 // TODO: proper exception
2507 throw new Exception("unexpected end-of-stream");
2508
2509 _position += read;
2510 return read;
2511 }
2512
2513 override void[] load(size_t max=-1)
2514 {
2515 return Conduit.load(this, max);
2516 }
2517
2518 override InputStream flush()
2519 {
2520 source.flush();
2521 return this;
2522 }
2523
2524 InputStream input()
2525 {
2526 return source;
2527 }
2528
2529 override long seek(long offset, Anchor anchor = cast(Anchor)0)
2530 {
2531 switch( anchor )
2532 {
2533 case Anchor.Begin:
2534 _position = offset;
2535 break;
2536
2537 case Anchor.Current:
2538 _position += offset;
2539 if( _position < 0 ) _position = 0;
2540 break;
2541
2542 case Anchor.End:
2543 _position = length+offset;
2544 if( _position < 0 ) _position = 0;
2545 break;
2546
2547 default:
2548 assert(false);
2549 }
2550
2551 return _position;
2552 }
2553
2554 private:
2555 InputStream source;
2556 InputStream seeker;
2557
2558 long _position, begin, length;
2559
2560 invariant()
2561 {
2562 assert( cast(Object) source is cast(Object) seeker );
2563 assert( begin >= 0 );
2564 assert( length >= 0 );
2565 assert( _position >= 0 );
2566 }
2567 }
2568
2569 /**
2570 * This stream can be used to provide stream-based access to a subset of
2571 * another stream. It is akin to slicing an array.
2572 */
2573 class SliceInputStream : InputStream
2574 {
2575 /**
2576 * Create a new slice stream from the given source, covering the content
2577 * starting at the current seek position for length bytes.
2578 */
2579 this(InputStream source, long length)
2580 in
2581 {
2582 assert( source !is null );
2583 assert( length >= 0 );
2584 }
2585 body
2586 {
2587 this.source = source;
2588 this._length = length;
2589 }
2590
2591 override IConduit conduit()
2592 {
2593 return source.conduit;
2594 }
2595
2596 override void close()
2597 {
2598 source = null;
2599 }
2600
2601 InputStream input()
2602 {
2603 return source;
2604 }
2605
2606 long seek (long ofs, Anchor anchor = Anchor.Begin)
2607 {
2608 return source.seek (ofs, anchor);
2609 }
2610
2611 override size_t read(void[] dst)
2612 {
2613 // If we're at the end of the slice, return eof
2614 if( _length <= 0 )
2615 return IConduit.Eof;
2616
2617 // Otherwise, make sure we don't try to read past the end of the slice
2618 if( dst.length > _length )
2619 dst.length = cast(size_t) _length;
2620
2621 // Do the read
2622 auto read = source.read(dst);
2623 if( read == IConduit.Eof )
2624 // If we got an Eof, we'll consider that a bug for the moment.
2625 // TODO: proper exception
2626 throw new Exception("unexpected end-of-stream");
2627
2628 _length -= read;
2629 return read;
2630 }
2631
2632 override void[] load(size_t max=-1)
2633 {
2634 return Conduit.load(this, max);
2635 }
2636
2637 override InputStream flush()
2638 {
2639 source.flush();
2640 return this;
2641 }
2642
2643 private:
2644 InputStream source;
2645 long _length;
2646
2647 invariant()
2648 {
2649 if( _length > 0 ) assert( source !is null );
2650 }
2651 }
2652
2653 /**
2654 * This stream can be used to provide stream-based access to a subset of
2655 * another stream. It is akin to slicing an array.
2656 *
2657 * This stream fully supports seeking, and as such requires that the
2658 * underlying stream also support seeking.
2659 */
2660 class SliceSeekOutputStream : OutputStream
2661 {
2662 //alias IConduit.Seek.Anchor Anchor;
2663
2664 /**
2665 * Create a new slice stream from the given source, covering the content
2666 * starting at position begin, for length bytes.
2667 */
2668 this(OutputStream source, long begin, long length)
2669 in
2670 {
2671 assert( (cast(IConduit.Seek) source.conduit) !is null );
2672 assert( begin >= 0 );
2673 assert( length >= 0 );
2674 }
2675 body
2676 {
2677 this.source = source;
2678 this.seeker = source; //cast(IConduit.Seek) source;
2679 this.begin = begin;
2680 this.length = length;
2681 }
2682
2683 override IConduit conduit()
2684 {
2685 return source.conduit;
2686 }
2687
2688 override void close()
2689 {
2690 source = null;
2691 seeker = null;
2692 }
2693
2694 size_t write(const(void)[] src)
2695 {
2696 // If we're at the end of the slice, return eof
2697 if( _position >= length )
2698 return IConduit.Eof;
2699
2700 // Otherwise, make sure we don't try to write past the end of the
2701 // slice
2702 if( _position+src.length > length )
2703 src.length = cast(size_t) (length-_position);
2704
2705 // Seek source stream to the appropriate location.
2706 if( seeker.seek(0, Anchor.Current) != begin+_position )
2707 seeker.seek(begin+_position, Anchor.Begin);
2708
2709 // Do the write
2710 auto wrote = source.write(src);
2711 if( wrote == IConduit.Eof )
2712 // If we got an Eof, we'll consider that a bug for the moment.
2713 // TODO: proper exception
2714 throw new Exception("unexpected end-of-stream");
2715
2716 _position += wrote;
2717 return wrote;
2718 }
2719
2720 override OutputStream copy(InputStream src, size_t max=-1)
2721 {
2722 Conduit.transfer(src, this, max);
2723 return this;
2724 }
2725
2726 override OutputStream flush()
2727 {
2728 source.flush();
2729 return this;
2730 }
2731
2732 override OutputStream output()
2733 {
2734 return source;
2735 }
2736
2737 override long seek(long offset, Anchor anchor = cast(Anchor)0)
2738 {
2739 switch( anchor )
2740 {
2741 case Anchor.Begin:
2742 _position = offset;
2743 break;
2744
2745 case Anchor.Current:
2746 _position += offset;
2747 if( _position < 0 ) _position = 0;
2748 break;
2749
2750 case Anchor.End:
2751 _position = length+offset;
2752 if( _position < 0 ) _position = 0;
2753 break;
2754
2755 default:
2756 assert(false);
2757 }
2758
2759 return _position;
2760 }
2761
2762 private:
2763 OutputStream source;
2764 OutputStream seeker;
2765
2766 long _position, begin, length;
2767
2768 invariant()
2769 {
2770 assert( cast(Object) source is cast(Object) seeker );
2771 assert( begin >= 0 );
2772 assert( length >= 0 );
2773 assert( _position >= 0 );
2774 }
2775 }
2776
2777 /*******************************************************************************
2778
2779 copyright: Copyright © 2007 Daniel Keep. All rights reserved.
2780
2781 license: BSD style: $(LICENSE)
2782
2783 version: Prerelease
2784
2785 author: Daniel Keep
2786
2787 *******************************************************************************/
2788
2789 //module tangox.io.stream.WrapStream;
2790
2791 //import tango.io.device.Conduit : Conduit;
2792 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream;
2793
2794 /**
2795 * This stream can be used to provide access to another stream.
2796 * Its distinguishing feature is that users cannot close the underlying
2797 * stream.
2798 *
2799 * This stream fully supports seeking, and as such requires that the
2800 * underlying stream also support seeking.
2801 */
2802 class WrapSeekInputStream : InputStream
2803 {
2804 //alias IConduit.Seek.Anchor Anchor;
2805
2806 /**
2807 * Create a new wrap stream from the given source.
2808 */
2809 this(InputStream source)
2810 in
2811 {
2812 assert( source !is null );
2813 assert( (cast(IConduit.Seek) source.conduit) !is null );
2814 }
2815 body
2816 {
2817 this.source = source;
2818 this.seeker = source; //cast(IConduit.Seek) source;
2819 this._position = seeker.seek(0, Anchor.Current);
2820 }
2821
2822 /// ditto
2823 this(InputStream source, long position)
2824 in
2825 {
2826 assert( position >= 0 );
2827 }
2828 body
2829 {
2830 this(source);
2831 this._position = position;
2832 }
2833
2834 override IConduit conduit()
2835 {
2836 return source.conduit;
2837 }
2838
2839 override void close()
2840 {
2841 source = null;
2842 seeker = null;
2843 }
2844
2845 override size_t read(void[] dst)
2846 {
2847 if( seeker.seek(0, Anchor.Current) != _position )
2848 seeker.seek(_position, Anchor.Begin);
2849
2850 auto read = source.read(dst);
2851 if( read != IConduit.Eof )
2852 _position += read;
2853
2854 return read;
2855 }
2856
2857 override void[] load(size_t max=-1)
2858 {
2859 return Conduit.load(this, max);
2860 }
2861
2862 override InputStream flush()
2863 {
2864 source.flush();
2865 return this;
2866 }
2867
2868 InputStream input()
2869 {
2870 return source;
2871 }
2872
2873 override long seek(long offset, Anchor anchor = cast(Anchor)0)
2874 {
2875 seeker.seek(_position, Anchor.Begin);
2876 return (_position = seeker.seek(offset, anchor));
2877 }
2878
2879 private:
2880 InputStream source;
2881 InputStream seeker;
2882 long _position;
2883
2884 invariant()
2885 {
2886 assert( cast(Object) source is cast(Object) seeker );
2887 assert( _position >= 0 );
2888 }
2889 }
2890
2891 /**
2892 * This stream can be used to provide access to another stream.
2893 * Its distinguishing feature is that the users cannot close the underlying
2894 * stream.
2895 *
2896 * This stream fully supports seeking, and as such requires that the
2897 * underlying stream also support seeking.
2898 */
2899 class WrapSeekOutputStream : OutputStream
2900 {
2901 //alias IConduit.Seek.Anchor Anchor;
2902
2903 /**
2904 * Create a new wrap stream from the given source.
2905 */
2906 this(OutputStream source)
2907 in
2908 {
2909 assert( (cast(IConduit.Seek) source.conduit) !is null );
2910 }
2911 body
2912 {
2913 this.source = source;
2914 this.seeker = source; //cast(IConduit.Seek) source;
2915 this._position = seeker.seek(0, Anchor.Current);
2916 }
2917
2918 /// ditto
2919 this(OutputStream source, long position)
2920 in
2921 {
2922 assert( position >= 0 );
2923 }
2924 body
2925 {
2926 this(source);
2927 this._position = position;
2928 }
2929
2930 override IConduit conduit()
2931 {
2932 return source.conduit;
2933 }
2934
2935 override void close()
2936 {
2937 source = null;
2938 seeker = null;
2939 }
2940
2941 size_t write(const(void)[] src)
2942 {
2943 if( seeker.seek(0, Anchor.Current) != _position )
2944 seeker.seek(_position, Anchor.Begin);
2945
2946 auto wrote = source.write(src);
2947 if( wrote != IConduit.Eof )
2948 _position += wrote;
2949 return wrote;
2950 }
2951
2952 override OutputStream copy(InputStream src, size_t max=-1)
2953 {
2954 Conduit.transfer(src, this, max);
2955 return this;
2956 }
2957
2958 override OutputStream flush()
2959 {
2960 source.flush();
2961 return this;
2962 }
2963
2964 override OutputStream output()
2965 {
2966 return source;
2967 }
2968
2969 override long seek(long offset, Anchor anchor = cast(Anchor)0)
2970 {
2971 seeker.seek(_position, Anchor.Begin);
2972 return (_position = seeker.seek(offset, anchor));
2973 }
2974
2975 private:
2976 OutputStream source;
2977 OutputStream seeker;
2978 long _position;
2979
2980 invariant()
2981 {
2982 assert( cast(Object) source is cast(Object) seeker );
2983 assert( _position >= 0 );
2984 }
2985 }
2986
2987