1 /******************************************************************************* 2 * 3 * copyright: Copyright (c) 2007 Daniel Keep. All rights reserved. 4 * 5 * license: BSD style: $(LICENSE) 6 * 7 * version: Initial release: December 2007 8 * 9 * author: Daniel Keep 10 * 11 ******************************************************************************/ 12 13 module tango.util.compress.Zip; 14 15 /* 16 17 TODO 18 ==== 19 20 * Disable UTF encoding until I've worked out what version of Zip that's 21 related to... (actually; it's entirely possible that's it's merely a 22 *proposal* at the moment.) (*Done*) 23 24 * Make ZipEntry safe: make them aware that their creating reader has been 25 destroyed. 26 27 */ 28 29 import tango.core.ByteSwap : ByteSwap; 30 import tango.io.device.Array : Array; 31 import tango.io.device.File : File; 32 import Path = tango.io.Path; 33 import tango.io.device.FileMap : FileMap; 34 import tango.io.stream.Zlib : ZlibInput, ZlibOutput; 35 import tango.util.digest.Crc32 : Crc32; 36 import tango.io.model.IConduit : IConduit, InputStream, OutputStream; 37 import tango.io.stream.Digester : DigestInput; 38 import tango.time.Time : Time, TimeSpan; 39 import tango.time.WallClock : WallClock; 40 import tango.time.chrono.Gregorian : Gregorian; 41 42 import Integer = tango.text.convert.Integer; 43 44 debug(Zip) import tango.io.Stdout : Stderr; 45 46 ////////////////////////////////////////////////////////////////////////////// 47 ////////////////////////////////////////////////////////////////////////////// 48 // 49 // Implementation crap 50 // 51 // Why is this here, you ask? Because of bloody DMD forward reference bugs. 52 // For pete's sake, Walter, FIX THEM, please! 53 // 54 // To skip to the actual user-visible stuff, search for "Shared stuff". 55 56 private 57 { 58 59 ////////////////////////////////////////////////////////////////////////////// 60 ////////////////////////////////////////////////////////////////////////////// 61 // 62 // LocalFileHeader 63 // 64 65 align(1) 66 struct LocalFileHeaderData 67 { 68 align(1): 69 ushort extract_version = ushort.max; 70 ushort general_flags = 0; 71 ushort compression_method = 0; 72 ushort modification_file_time = 0; 73 ushort modification_file_date = 0; 74 uint crc_32 = 0; // offsetof = 10 75 uint compressed_size = 0; 76 uint uncompressed_size = 0; 77 ushort file_name_length = 0; 78 ushort extra_field_length = 0; 79 80 debug(Zip) void dump() 81 { 82 Stderr 83 ("LocalFileHeader.Data {")("\n") 84 (" extract_version = ")(extract_version)("\n") 85 (" general_flags = ")(general_flags)("\n") 86 (" compression_method = ")(compression_method)("\n") 87 (" modification_file_time = ")(modification_file_time)("\n") 88 (" modification_file_date = ")(modification_file_date)("\n") 89 (" crc_32 = ")(crc_32)("\n") 90 (" compressed_size = ")(compressed_size)("\n") 91 (" uncompressed_size = ")(uncompressed_size)("\n") 92 (" file_name_length = ")(file_name_length)("\n") 93 (" extra_field_length = ")(extra_field_length)("\n") 94 ("}").newline; 95 } 96 } 97 98 struct LocalFileHeader 99 { 100 enum uint signature = 0x04034b50; 101 102 alias LocalFileHeaderData Data; 103 Data data; 104 static assert( Data.sizeof == 26 ); 105 106 char[] file_name; 107 ubyte[] extra_field; 108 109 void[] data_arr() 110 { 111 return (&data)[0..1]; 112 } 113 114 void put(OutputStream output) 115 { 116 // Make sure var-length fields will fit. 117 if( file_name.length > ushort.max ) 118 ZipException.fntoolong; 119 120 if( extra_field.length > ushort.max ) 121 ZipException.eftoolong; 122 123 // Encode filename 124 auto file_name = utf8_to_cp437(this.file_name); 125 126 if( file_name is null ) 127 ZipException.fnencode; 128 129 // Update lengths in data 130 Data data = this.data; 131 data.file_name_length = cast(ushort) file_name.length; 132 data.extra_field_length = cast(ushort) extra_field.length; 133 134 // Do it 135 version( BigEndian ) swapAll(data); 136 writeExact(output, (&data)[0..1]); 137 writeExact(output, file_name); 138 writeExact(output, extra_field); 139 } 140 141 void fill(InputStream src) 142 { 143 readExact(src, data_arr()); 144 version( BigEndian ) swapAll(data); 145 146 //debug(Zip) data.dump; 147 148 auto tmp = new ubyte[data.file_name_length]; 149 readExact(src, tmp); 150 file_name = cp437_to_utf8(tmp); 151 152 extra_field = new ubyte[data.extra_field_length]; 153 readExact(src, extra_field); 154 } 155 156 /* 157 * This method will check to make sure that the local and central headers 158 * are the same; if they're not, then that indicates that the archive is 159 * corrupt. 160 */ 161 bool agrees_with(FileHeader h) 162 { 163 // NOTE: extra_field used to be compared with h.extra_field, but this caused 164 // an assertion in certain archives. I found a mention of these fields being 165 // allowed to be different, so I think it in general is wrong to include in 166 // this sanity check. larsivi 20081111 167 if( data.extract_version != h.data.extract_version 168 || data.general_flags != h.data.general_flags 169 || data.compression_method != h.data.compression_method 170 || data.modification_file_time != h.data.modification_file_time 171 || data.modification_file_date != h.data.modification_file_date 172 || file_name != h.file_name ) 173 return false; 174 175 // We need a separate check for the sizes and crc32, since these will 176 // be zero if a trailing descriptor was used. 177 if( !h.usingDataDescriptor() && ( 178 data.crc_32 != h.data.crc_32 179 || data.compressed_size != h.data.compressed_size 180 || data.uncompressed_size != h.data.uncompressed_size ) ) 181 return false; 182 183 return true; 184 } 185 } 186 187 ////////////////////////////////////////////////////////////////////////////// 188 ////////////////////////////////////////////////////////////////////////////// 189 // 190 // FileHeader 191 // 192 193 align(1) 194 struct FileHeaderData 195 { 196 align(1): 197 ubyte zip_version; 198 ubyte file_attribute_type; 199 ushort extract_version; 200 ushort general_flags; 201 ushort compression_method; 202 ushort modification_file_time; 203 ushort modification_file_date; 204 uint crc_32; 205 uint compressed_size; 206 uint uncompressed_size; 207 ushort file_name_length; 208 ushort extra_field_length; 209 ushort file_comment_length; 210 ushort disk_number_start; 211 ushort internal_file_attributes = 0; 212 uint external_file_attributes = 0; 213 int relative_offset_of_local_header; 214 215 debug(Zip) void dump() 216 { 217 Stderr 218 ("FileHeader.Data {\n") 219 (" zip_version = ")(zip_version)("\n") 220 (" file_attribute_type = ")(file_attribute_type)("\n") 221 (" extract_version = ")(extract_version)("\n") 222 (" general_flags = ")(general_flags)("\n") 223 (" compression_method = ")(compression_method)("\n") 224 (" modification_file_time = ")(modification_file_time)("\n") 225 (" modification_file_date = ")(modification_file_date)("\n") 226 (" crc_32 = ")(crc_32)("\n") 227 (" compressed_size = ")(compressed_size)("\n") 228 (" uncompressed_size = ")(uncompressed_size)("\n") 229 (" file_name_length = ")(file_name_length)("\n") 230 (" extra_field_length = ")(extra_field_length)("\n") 231 (" file_comment_length = ")(file_comment_length)("\n") 232 (" disk_number_start = ")(disk_number_start)("\n") 233 (" internal_file_attributes = ")(internal_file_attributes)("\n") 234 (" external_file_attributes = ")(external_file_attributes)("\n") 235 (" relative_offset_of_local_header = ")(relative_offset_of_local_header) 236 ("\n") 237 ("}").newline; 238 } 239 240 void fromLocal(LocalFileHeader.Data data) 241 { 242 extract_version = data.extract_version; 243 general_flags = data.general_flags; 244 compression_method = data.compression_method; 245 modification_file_time = data.modification_file_time; 246 modification_file_date = data.modification_file_date; 247 crc_32 = data.crc_32; 248 compressed_size = data.compressed_size; 249 uncompressed_size = data.uncompressed_size; 250 file_name_length = data.file_name_length; 251 extra_field_length = data.extra_field_length; 252 } 253 } 254 255 struct FileHeader 256 { 257 enum uint signature = 0x02014b50; 258 259 alias FileHeaderData Data; 260 Data* data; 261 static assert( Data.sizeof == 42 ); 262 263 const(char)[] file_name; 264 ubyte[] extra_field; 265 const(char)[] file_comment; 266 267 bool usingDataDescriptor() 268 { 269 return !!(data.general_flags & 1<<3); 270 } 271 272 uint compressionOptions() 273 { 274 return (data.general_flags >> 1) & 0b11; 275 } 276 277 bool usingUtf8() 278 { 279 //return !!(data.general_flags & 1<<11); 280 return false; 281 } 282 283 void[] data_arr() 284 { 285 return (cast(void*)data)[0 .. Data.sizeof]; 286 } 287 288 void put(OutputStream output) 289 { 290 // Make sure the var-length fields will fit. 291 if( file_name.length > ushort.max ) 292 ZipException.fntoolong; 293 294 if( extra_field.length > ushort.max ) 295 ZipException.eftoolong; 296 297 if( file_comment.length > ushort.max ) 298 ZipException.cotoolong; 299 300 // encode the filename and comment 301 auto file_name = utf8_to_cp437(this.file_name); 302 auto file_comment = utf8_to_cp437(this.file_comment); 303 304 if( file_name is null ) 305 ZipException.fnencode; 306 307 if( file_comment is null && this.file_comment !is null ) 308 ZipException.coencode; 309 310 // Update the lengths 311 Data data = *(this.data); 312 data.file_name_length = cast(ushort) file_name.length; 313 data.extra_field_length = cast(ushort) extra_field.length; 314 data.file_comment_length = cast(ushort) file_comment.length; 315 316 // Ok; let's do this! 317 version( BigEndian ) swapAll(data); 318 writeExact(output, (&data)[0..1]); 319 writeExact(output, file_name); 320 writeExact(output, extra_field); 321 writeExact(output, file_comment); 322 } 323 324 long map(void[] src) 325 { 326 //debug(Zip) Stderr.formatln("FileHeader.map([0..{}])",src.length); 327 328 auto old_ptr = src.ptr; 329 330 data = cast(Data*) src.ptr; 331 src = src[Data.sizeof..$]; 332 version( BigEndian ) swapAll(*data); 333 334 //debug(Zip) data.dump; 335 336 inout(char[]) function(inout(ubyte[])) conv_fn; 337 if( usingUtf8() ) 338 conv_fn = &cp437_to_utf8; 339 else 340 conv_fn = &utf8_to_utf8; 341 342 file_name = conv_fn( 343 cast(ubyte[]) src[0..data.file_name_length]); 344 src = src[data.file_name_length..$]; 345 346 extra_field = cast(ubyte[]) src[0..data.extra_field_length]; 347 src = src[data.extra_field_length..$]; 348 349 file_comment = conv_fn( 350 cast(ubyte[]) src[0..data.file_comment_length]); 351 src = src[data.file_comment_length..$]; 352 353 // Return how many bytes we've eaten 354 //debug(Zip) Stderr.formatln(" . used {} bytes", cast(long)(src.ptr - old_ptr)); 355 return cast(long)(src.ptr - old_ptr); 356 } 357 } 358 359 ////////////////////////////////////////////////////////////////////////////// 360 ////////////////////////////////////////////////////////////////////////////// 361 // 362 // EndOfCDRecord 363 // 364 365 align(1) 366 struct EndOfCDRecordData 367 { 368 align(1): 369 ushort disk_number = 0; 370 ushort disk_with_start_of_central_directory = 0; 371 ushort central_directory_entries_on_this_disk; 372 ushort central_directory_entries_total; 373 uint size_of_central_directory; 374 uint offset_of_start_of_cd_from_starting_disk; 375 ushort file_comment_length; 376 377 debug(Zip) void dump() 378 { 379 Stderr 380 .formatln("EndOfCDRecord.Data {}","{") 381 .formatln(" disk_number = {}", disk_number) 382 .formatln(" disk_with_start_of_central_directory = {}", 383 disk_with_start_of_central_directory) 384 .formatln(" central_directory_entries_on_this_disk = {}", 385 central_directory_entries_on_this_disk) 386 .formatln(" central_directory_entries_total = {}", 387 central_directory_entries_total) 388 .formatln(" size_of_central_directory = {}", 389 size_of_central_directory) 390 .formatln(" offset_of_start_of_cd_from_starting_disk = {}", 391 offset_of_start_of_cd_from_starting_disk) 392 .formatln(" file_comment_length = {}", file_comment_length) 393 .formatln("}"); 394 } 395 } 396 397 struct EndOfCDRecord 398 { 399 enum uint signature = 0x06054b50; 400 401 alias EndOfCDRecordData Data; 402 Data data; 403 static assert( data.sizeof == 18 ); 404 405 char[] file_comment; 406 407 void[] data_arr() 408 { 409 return (cast(void*)&data)[0 .. data.sizeof]; 410 } 411 412 void put(OutputStream output) 413 { 414 // Set up the comment; check length, encode 415 if( file_comment.length > ushort.max ) 416 ZipException.cotoolong; 417 418 auto file_comment = utf8_to_cp437(this.file_comment); 419 420 // Set up data block 421 Data data = this.data; 422 data.file_comment_length = cast(ushort) file_comment.length; 423 424 version( BigEndian ) swapAll(data); 425 writeExact(output, (&data)[0..1]); 426 } 427 428 void fill(void[] src) 429 { 430 //Stderr.formatln("EndOfCDRecord.fill([0..{}])",src.length); 431 432 auto _data = data_arr(); 433 _data[] = src[0.._data.length]; 434 src = src[_data.length..$]; 435 version( BigEndian ) swapAll(data); 436 437 //data.dump; 438 439 file_comment = cast(char[]) src[0..data.file_comment_length].dup; 440 } 441 } 442 443 // End of implementation crap 444 } 445 446 ////////////////////////////////////////////////////////////////////////////// 447 ////////////////////////////////////////////////////////////////////////////// 448 // 449 // Shared stuff 450 451 public 452 { 453 /** 454 * This enumeration denotes the kind of compression used on a file. 455 */ 456 enum Method 457 { 458 /// No compression should be used. 459 Store, 460 /// Deflate compression. 461 Deflate, 462 /** 463 * This is a special value used for unsupported or unrecognised 464 * compression methods. This value is only used internally. 465 */ 466 Unsupported 467 } 468 } 469 470 private 471 { 472 const ushort ZIP_VERSION = 20; 473 const ushort MAX_EXTRACT_VERSION = 20; 474 475 /* compression flags 476 uses trailing descriptor | 477 utf-8 encoding | | 478 ^ ^ /\ */ 479 const ushort SUPPORTED_FLAGS = 0b00_0_0_0_0000_0_0_0_1_11_0; 480 const ushort UNSUPPORTED_FLAGS = cast(ushort)~cast(int)SUPPORTED_FLAGS; 481 482 Method toMethod(ushort method) 483 { 484 switch( method ) 485 { 486 case 0: return Method.Store; 487 case 8: return Method.Deflate; 488 default: return Method.Unsupported; 489 } 490 } 491 492 ushort fromMethod(Method method) 493 { 494 switch( method ) 495 { 496 case Method.Store: return 0; 497 case Method.Deflate: return 8; 498 default: 499 assert(false, "unsupported compression method"); 500 } 501 } 502 503 /* NOTE: This doesn't actually appear to work. Using the default magic 504 * number with Tango's Crc32 digest works, however. 505 */ 506 //const CRC_MAGIC = 0xdebb20e3u; 507 } 508 509 ////////////////////////////////////////////////////////////////////////////// 510 ////////////////////////////////////////////////////////////////////////////// 511 // 512 // ZipReader 513 514 interface ZipReader 515 { 516 bool streamed(); 517 void close(); 518 bool more(); 519 ZipEntry get(); 520 ZipEntry get(ZipEntry); 521 int opApply(int delegate(ref ZipEntry)); 522 } 523 524 ////////////////////////////////////////////////////////////////////////////// 525 ////////////////////////////////////////////////////////////////////////////// 526 // 527 // ZipWriter 528 529 interface ZipWriter 530 { 531 void finish(); 532 void putFile(ZipEntryInfo info, const(char)[] path); 533 void putStream(ZipEntryInfo info, InputStream source); 534 void putEntry(ZipEntryInfo info, ZipEntry entry); 535 void putData(ZipEntryInfo info, const(void)[] data); 536 Method method(); 537 Method method(Method); 538 } 539 540 ////////////////////////////////////////////////////////////////////////////// 541 ////////////////////////////////////////////////////////////////////////////// 542 // 543 // ZipBlockReader 544 545 /** 546 * The ZipBlockReader class is used to parse a Zip archive. It exposes the 547 * contents of the archive via an iteration interface. For instance, to loop 548 * over all files in an archive, one can use either 549 * 550 * ----- 551 * foreach( entry ; reader ) 552 * ... 553 * ----- 554 * 555 * Or 556 * 557 * ----- 558 * while( reader.more ) 559 * { 560 * auto entry = reader.get; 561 * ... 562 * } 563 * ----- 564 * 565 * See the ZipEntry class for more information on the contents of entries. 566 * 567 * Note that this class can only be used with input sources which can be 568 * freely seeked. Also note that you may open a ZipEntry instance produced by 569 * this reader at any time until the ZipReader that created it is closed. 570 */ 571 class ZipBlockReader : ZipReader 572 { 573 /** 574 * Creates a ZipBlockReader using the specified file on the local 575 * filesystem. 576 */ 577 this(const(char)[] path) 578 { 579 file_source = new File(path); 580 this(file_source); 581 } 582 583 /** 584 * Creates a ZipBlockReader using the provided InputStream. Please note 585 * that this InputStream must be attached to a conduit implementing the 586 * IConduit.Seek interface. 587 */ 588 this(InputStream source) 589 in 590 { 591 assert( cast(IConduit.Seek) source.conduit, "source stream must be seekable" ); 592 } 593 body 594 { 595 this.source = source; 596 this.seeker = source; //cast(IConduit.Seek) source; 597 } 598 599 bool streamed() { return false; } 600 601 /** 602 * Closes the reader, and releases all resources. After this operation, 603 * all ZipEntry instances created by this ZipReader are invalid and should 604 * not be used. 605 */ 606 void close() 607 { 608 // NOTE: Originally more of the GC allocated data in this class were 609 // explicitly deleted here, such as cd_data - this caused segfaults 610 // and have been removed as they were not necessary from correctness 611 // point of view, and the memory usage win is questionable. 612 state = State.Done; 613 source = null; 614 seeker = null; 615 destroy(headers); 616 617 if( file_source !is null ) 618 { 619 file_source.close(); 620 destroy(file_source); 621 } 622 } 623 624 /** 625 * Returns true if and only if there are additional files in the archive 626 * which have not been read via the get method. This returns true before 627 * the first call to get (assuming the opened archive is non-empty), and 628 * false after the last file has been accessed. 629 */ 630 bool more() 631 { 632 switch( state ) 633 { 634 case State.Init: 635 read_cd(); 636 assert( state == State.Open ); 637 return more(); 638 639 case State.Open: 640 return (current_index < headers.length); 641 642 case State.Done: 643 return false; 644 645 default: 646 assert(false); 647 } 648 } 649 650 /** 651 * Retrieves the next file from the archive. Note that although this does 652 * perform IO operations, it will not read the contents of the file. 653 * 654 * The optional reuse argument can be used to instruct the reader to reuse 655 * an existing ZipEntry instance. If passed a null reference, it will 656 * create a new ZipEntry instance. 657 */ 658 ZipEntry get() 659 { 660 if( !more() ) 661 ZipExhaustedException(); 662 663 return new ZipEntry(headers[current_index++], &open_file); 664 } 665 666 /// ditto 667 ZipEntry get(ZipEntry reuse) 668 { 669 if( !more() ) 670 ZipExhaustedException(); 671 672 if( reuse is null ) 673 return new ZipEntry(headers[current_index++], &open_file); 674 else 675 return reuse.reset(headers[current_index++], &open_file); 676 } 677 678 /** 679 * This is used to iterate over the contents of an archive using a foreach 680 * loop. Please note that the iteration will reuse the ZipEntry instance 681 * passed to your loop. If you wish to keep the instance and re-use it 682 * later, you $(B must) use the dup member to create a copy. 683 */ 684 int opApply(int delegate(ref ZipEntry) dg) 685 { 686 int result = 0; 687 ZipEntry entry; 688 689 while( more() ) 690 { 691 entry = get(entry); 692 693 result = dg(entry); 694 if( result ) 695 break; 696 } 697 698 if( entry !is null ) 699 destroy(entry); 700 701 return result; 702 } 703 704 private: 705 InputStream source; 706 InputStream seeker; //IConduit.Seek seeker; 707 708 enum State { Init, Open, Done } 709 State state; 710 size_t current_index = 0; 711 FileHeader[] headers; 712 713 // These should be killed when the reader is closed. 714 ubyte[] cd_data; 715 File file_source = null; 716 717 /* 718 * This function will read the contents of the central directory. Split 719 * or spanned archives aren't supported. 720 */ 721 void read_cd() 722 in 723 { 724 assert( state == State.Init ); 725 assert( headers is null ); 726 assert( cd_data is null ); 727 } 728 out 729 { 730 assert( state == State.Open ); 731 assert( headers !is null ); 732 assert( cd_data !is null ); 733 assert( current_index == 0 ); 734 } 735 body 736 { 737 //Stderr.formatln("ZipReader.read_cd()"); 738 739 // First, we need to locate the end of cd record, so that we know 740 // where the cd itself is, and how big it is. 741 auto eocdr = read_eocd_record(); 742 743 // Now, make sure the archive is all in one file. 744 if( eocdr.data.disk_number != 745 eocdr.data.disk_with_start_of_central_directory 746 || eocdr.data.central_directory_entries_on_this_disk != 747 eocdr.data.central_directory_entries_total ) 748 ZipNotSupportedException.spanned(); 749 750 // Ok, read the whole damn thing in one go. 751 cd_data = new ubyte[eocdr.data.size_of_central_directory]; 752 long cd_offset = eocdr.data.offset_of_start_of_cd_from_starting_disk; 753 seeker.seek(cd_offset, seeker.Anchor.Begin); 754 readExact(source, cd_data); 755 756 // Cake. Now, we need to break it up into records. 757 headers = new FileHeader[ 758 eocdr.data.central_directory_entries_total]; 759 760 long cdr_offset = cd_offset; 761 762 // Ok, map the CD data into file headers. 763 foreach( i,ref header ; headers ) 764 { 765 //Stderr.formatln(" . reading header {}...", i); 766 767 // Check signature 768 { 769 uint sig = (cast(uint[])(cd_data[0..4]))[0]; 770 version( BigEndian ) swap(sig); 771 if( sig != FileHeader.signature ) 772 ZipException.badsig("file header"); 773 } 774 775 auto used = header.map(cd_data[4..$]); 776 assert( used <= (size_t.max-4) ); 777 cd_data = cd_data[4+cast(size_t)used..$]; 778 779 // Update offset for next record 780 cdr_offset += 4 /* for sig. */ + used; 781 } 782 783 // Done! 784 state = State.Open; 785 } 786 787 /* 788 * This will locate the end of CD record in the open stream. 789 * 790 * This code sucks, but that's because Zip sucks. 791 * 792 * Basically, the EOCD record is stuffed somewhere at the end of the file. 793 * In a brilliant move, the record is *variably sized*, which means we 794 * have to do a linear backwards search to find it. 795 * 796 * The header itself (including the signature) is at minimum 22 bytes 797 * long, plus anywhere between 0 and 2^16-1 bytes of comment. That means 798 * we need to read the last 2^16-1 + 22 bytes from the file, and look for 799 * the signature [0x50,0x4b,0x05,0x06] in [0 .. $-18]. 800 * 801 * If we find the EOCD record, we'll return its contents. If we couldn't 802 * find it, we'll throw an exception. 803 */ 804 EndOfCDRecord read_eocd_record() 805 in 806 { 807 assert( state == State.Init ); 808 } 809 body 810 { 811 //Stderr.formatln("read_eocd_record()"); 812 813 // Signature + record + max. comment length 814 const max_chunk_len = 4 + EndOfCDRecord.Data.sizeof + ushort.max; 815 816 auto file_len = seeker.seek(0, seeker.Anchor.End); 817 assert( file_len <= size_t.max ); 818 819 // We're going to need min(max_chunk_len, file_len) bytes. 820 size_t chunk_len = max_chunk_len; 821 if( file_len < max_chunk_len ) 822 chunk_len = cast(size_t) file_len; 823 //Stderr.formatln(" . chunk_len = {}", chunk_len); 824 825 // Seek back and read in the chunk. Don't forget to clean up after 826 // ourselves. 827 seeker.seek(-cast(long)chunk_len, seeker.Anchor.End); 828 auto chunk_offset = seeker.seek(0, seeker.Anchor.Current); 829 //Stderr.formatln(" . chunk_offset = {}", chunk_offset); 830 auto chunk = new ubyte[chunk_len]; 831 scope(exit) destroy(chunk); 832 readExact(source, chunk); 833 834 // Now look for our magic number. Don't forget that on big-endian 835 // machines, we need to byteswap the value we're looking for. 836 uint eocd_magic = EndOfCDRecord.signature; 837 version( BigEndian ) 838 swap(eocd_magic); 839 840 size_t eocd_loc = -1; 841 842 if( chunk_len >= 18 ) 843 for( size_t i=chunk_len-18; i>=0; --i ) 844 { 845 if( *(cast(uint*)(chunk.ptr+i)) == eocd_magic ) 846 { 847 // Found the bugger! Make sure we skip the signature (forgot 848 // to do that originally; talk about weird errors :P) 849 eocd_loc = i+4; 850 break; 851 } 852 } 853 854 // If we didn't find it, then we'll assume that this is not a valid 855 // archive. 856 if( eocd_loc == -1 ) 857 ZipException.missingdir; 858 859 // Ok, so we found it; now what? Now we need to read the record 860 // itself in. eocd_loc is the offset within the chunk where the eocd 861 // record was found, so slice it out. 862 EndOfCDRecord eocdr; 863 eocdr.fill(chunk[eocd_loc..$]); 864 865 // Excellent. We're done here. 866 return eocdr; 867 } 868 869 /* 870 * Opens the specified file for reading. If the raw argument passed is 871 * true, then the file is *not* decompressed. 872 */ 873 InputStream open_file(FileHeader header, bool raw) 874 { 875 // Check to make sure that we actually *can* open this file. 876 if( header.data.extract_version > MAX_EXTRACT_VERSION ) 877 ZipNotSupportedException.zipver(header.data.extract_version); 878 879 if( header.data.general_flags & UNSUPPORTED_FLAGS ) 880 ZipNotSupportedException.flags(); 881 882 if( toMethod(header.data.compression_method) == Method.Unsupported ) 883 ZipNotSupportedException.method(header.data.compression_method); 884 885 // Open a raw stream 886 InputStream stream = open_file_raw(header); 887 888 // If that's all they wanted, pass it back. 889 if( raw ) 890 return stream; 891 892 // Next up, wrap in an appropriate decompression stream 893 switch( toMethod(header.data.compression_method) ) 894 { 895 case Method.Store: 896 // Do nothing: \o/ 897 break; 898 899 case Method.Deflate: 900 // Wrap in a zlib stream. We want a raw deflate stream, 901 // so force no encoding. 902 stream = new ZlibInput(stream, ZlibInput.Encoding.None); 903 break; 904 905 default: 906 assert(false); 907 } 908 909 // We done, yo! 910 return stream; 911 } 912 913 /* 914 * Opens a file's raw input stream. Basically, this returns a slice of 915 * the archive's input stream. 916 */ 917 InputStream open_file_raw(FileHeader header) 918 { 919 // Seek to and parse the local file header 920 seeker.seek(header.data.relative_offset_of_local_header, 921 seeker.Anchor.Begin); 922 923 { 924 uint sig; 925 readExact(source, (&sig)[0..1]); 926 version( BigEndian ) swap(sig); 927 if( sig != LocalFileHeader.signature ) 928 ZipException.badsig("local file header"); 929 } 930 931 LocalFileHeader lheader; lheader.fill(source); 932 933 if( !lheader.agrees_with(header) ) 934 ZipException.incons(header.file_name); 935 936 // Ok; get a slice stream for the file 937 return new SliceSeekInputStream( 938 source, seeker.seek(0, seeker.Anchor.Current), 939 header.data.compressed_size); 940 } 941 } 942 943 ////////////////////////////////////////////////////////////////////////////// 944 ////////////////////////////////////////////////////////////////////////////// 945 // 946 // ZipBlockWriter 947 948 /** 949 * The ZipBlockWriter class is used to create a Zip archive. It uses a 950 * writing iterator interface. 951 * 952 * Note that this class can only be used with output streams which can be 953 * freely seeked. 954 */ 955 956 class ZipBlockWriter : ZipWriter 957 { 958 /** 959 * Creates a ZipBlockWriter using the specified file on the local 960 * filesystem. 961 */ 962 this(const(char)[] path) 963 { 964 file_output = new File(path, File.WriteCreate); 965 this(file_output); 966 } 967 968 /** 969 * Creates a ZipBlockWriter using the provided OutputStream. Please note 970 * that this OutputStream must be attached to a conduit implementing the 971 * IConduit.Seek interface. 972 */ 973 this(OutputStream output) 974 in 975 { 976 assert( output !is null ); 977 assert( (cast(IConduit.Seek) output.conduit) !is null ); 978 } 979 body 980 { 981 this.output = output; 982 this.seeker = output; // cast(IConduit.Seek) output; 983 984 // Default to Deflate compression 985 method = Method.Deflate; 986 } 987 988 /** 989 * Finalises the archive, writes out the central directory, and closes the 990 * output stream. 991 */ 992 void finish() 993 { 994 put_cd(); 995 output.close(); 996 output = null; 997 seeker = null; 998 999 if( file_output !is null ) destroy(file_output); 1000 } 1001 1002 /** 1003 * Adds a file from the local filesystem to the archive. 1004 */ 1005 void putFile(ZipEntryInfo info, const(char)[] path) 1006 { 1007 scope file = new File(path); 1008 scope(exit) file.close(); 1009 putStream(info, file); 1010 } 1011 1012 /** 1013 * Adds a file using the contents of the given InputStream to the archive. 1014 */ 1015 void putStream(ZipEntryInfo info, InputStream source) 1016 { 1017 put_compressed(info, source); 1018 } 1019 1020 /** 1021 * Transfers a file from another archive into this archive. Note that 1022 * this method will not perform any compression: whatever compression was 1023 * applied to the file originally will be preserved. 1024 */ 1025 void putEntry(ZipEntryInfo info, ZipEntry entry) 1026 { 1027 put_raw(info, entry); 1028 } 1029 1030 /** 1031 * Adds a file using the contents of the given array to the archive. 1032 */ 1033 void putData(ZipEntryInfo info, const(void)[] data) 1034 { 1035 //scope mc = new MemoryConduit(data); 1036 scope mc = new Array(data.dup); 1037 scope(exit) mc.close(); 1038 put_compressed(info, mc); 1039 } 1040 1041 /** 1042 * This property allows you to control what compression method should be 1043 * used for files being added to the archive. 1044 */ 1045 @property 1046 Method method() { return _method; } 1047 @property 1048 Method method(Method v) { return _method = v; } /// ditto 1049 1050 private: 1051 OutputStream output; 1052 OutputStream seeker; 1053 File file_output; 1054 1055 Method _method; 1056 1057 struct Entry 1058 { 1059 FileHeaderData data; 1060 long header_position; 1061 const(char)[] filename; 1062 const(char)[] comment; 1063 ubyte[] extra; 1064 } 1065 Entry[] entries; 1066 1067 void put_cd() 1068 { 1069 // check that there aren't too many CD entries 1070 if( entries.length > ushort.max ) 1071 ZipException.toomanyentries; 1072 1073 auto cd_pos = seeker.seek(0, seeker.Anchor.Current); 1074 if( cd_pos > uint.max ) 1075 ZipException.toolong; 1076 1077 foreach( entry ; entries ) 1078 { 1079 FileHeader header; 1080 header.data = &entry.data; 1081 header.file_name = entry.filename; 1082 header.extra_field = entry.extra; 1083 header.file_comment = entry.comment; 1084 1085 write(output, FileHeader.signature); 1086 header.put(output); 1087 } 1088 1089 auto cd_len = seeker.seek(0, seeker.Anchor.Current) - cd_pos; 1090 1091 if( cd_len > uint.max ) 1092 ZipException.cdtoolong; 1093 1094 { 1095 assert( entries.length < ushort.max ); 1096 assert( cd_len < uint.max ); 1097 assert( cd_pos < uint.max ); 1098 1099 EndOfCDRecord eocdr; 1100 eocdr.data.central_directory_entries_on_this_disk = 1101 cast(ushort) entries.length; 1102 eocdr.data.central_directory_entries_total = 1103 cast(ushort) entries.length; 1104 eocdr.data.size_of_central_directory = 1105 cast(uint) cd_len; 1106 eocdr.data.offset_of_start_of_cd_from_starting_disk = 1107 cast(uint) cd_pos; 1108 1109 write(output, EndOfCDRecord.signature); 1110 eocdr.put(output); 1111 } 1112 } 1113 1114 void put_raw(ZipEntryInfo info, ZipEntry entry) 1115 { 1116 // Write out local file header 1117 LocalFileHeader.Data lhdata; 1118 auto chdata = entry.header.data; 1119 lhdata.extract_version = chdata.extract_version; 1120 1121 // Note: we need to mask off the data descriptor bit because we aren't 1122 // going to write one. 1123 lhdata.general_flags = chdata.general_flags & ~(1<<3); 1124 lhdata.compression_method = chdata.compression_method; 1125 lhdata.crc_32 = chdata.crc_32; 1126 lhdata.compressed_size = chdata.compressed_size; 1127 lhdata.uncompressed_size = chdata.uncompressed_size; 1128 1129 timeToDos(info.modified, lhdata.modification_file_time, 1130 lhdata.modification_file_date); 1131 1132 put_local_header(lhdata, info.name); 1133 1134 // Store comment 1135 entries[$-1].comment = info.comment; 1136 1137 // Output file contents 1138 { 1139 auto input = entry.open_raw(); 1140 scope(exit) input.close(); 1141 output.copy(input).flush(); 1142 } 1143 } 1144 1145 void put_compressed(ZipEntryInfo info, InputStream source) 1146 { 1147 debug(Zip) Stderr.formatln("ZipBlockWriter.put_compressed()"); 1148 1149 // Write out partial local file header 1150 auto header_pos = seeker.seek(0, seeker.Anchor.Current); 1151 debug(Zip) Stderr.formatln(" . header for {} at {}", info.name, header_pos); 1152 put_local_header(info, _method); 1153 1154 // Store comment 1155 entries[$-1].comment = info.comment; 1156 1157 uint crc; 1158 uint compressed_size; 1159 uint uncompressed_size; 1160 1161 // Output file contents 1162 { 1163 // Input/output chains 1164 InputStream in_chain = source; 1165 OutputStream out_chain = new WrapSeekOutputStream(output); 1166 1167 // Count number of bytes coming in from the source file 1168 scope in_counter = new CounterInput(in_chain); 1169 in_chain = in_counter; 1170 assert( in_counter.count() <= typeof(uncompressed_size).max ); 1171 scope(success) uncompressed_size = cast(uint) in_counter.count(); 1172 1173 // Count the number of bytes going out to the archive 1174 scope out_counter = new CounterOutput(out_chain); 1175 out_chain = out_counter; 1176 assert( out_counter.count() <= typeof(compressed_size).max ); 1177 scope(success) compressed_size = cast(uint) out_counter.count(); 1178 1179 // Add crc 1180 scope crc_d = new Crc32(/*CRC_MAGIC*/); 1181 scope crc_s = new DigestInput(in_chain, crc_d); 1182 in_chain = crc_s; 1183 scope(success) 1184 { 1185 debug(Zip) Stderr.formatln(" . Success: storing CRC."); 1186 crc = crc_d.crc32Digest(); 1187 } 1188 1189 // Add compression 1190 ZlibOutput compress; 1191 scope(exit) if( compress !is null ) destroy(compress); 1192 1193 switch( _method ) 1194 { 1195 case Method.Store: 1196 break; 1197 1198 case Method.Deflate: 1199 compress = new ZlibOutput(out_chain, 1200 ZlibOutput.Level.init, ZlibOutput.Encoding.None); 1201 out_chain = compress; 1202 break; 1203 1204 default: 1205 assert(false); 1206 } 1207 1208 // All done. 1209 scope(exit) in_chain.close(); 1210 scope(success) in_chain.flush(); 1211 scope(exit) out_chain.close(); 1212 1213 out_chain.copy(in_chain).flush(); 1214 1215 debug(Zip) if( compress !is null ) 1216 { 1217 Stderr.formatln(" . compressed to {} bytes", compress.written); 1218 } 1219 1220 debug(Zip) Stderr.formatln(" . wrote {} bytes", out_counter.count); 1221 debug(Zip) Stderr.formatln(" . contents written"); 1222 } 1223 1224 debug(Zip) Stderr.formatln(" . CRC for \"{}\": 0x{:x8}", info.name, crc); 1225 1226 // Rewind, and patch the header 1227 auto final_pos = seeker.seek(0, seeker.Anchor.Current); 1228 seeker.seek(header_pos); 1229 patch_local_header(crc, compressed_size, uncompressed_size); 1230 1231 // Seek back to the end of the file, and we're done! 1232 seeker.seek(final_pos); 1233 } 1234 1235 /* 1236 * Patches the local file header starting at the current output location 1237 * with updated crc and size information. Also updates the current last 1238 * Entry. 1239 */ 1240 void patch_local_header(uint crc_32, uint compressed_size, 1241 uint uncompressed_size) 1242 { 1243 /* BUG: For some reason, this code won't compile. No idea why... if 1244 * you instantiate LFHD, it says that there is no "offsetof" property. 1245 */ 1246 /+ 1247 alias LocalFileHeaderData LFHD; 1248 static assert( LFHD.compressed_size.offsetof 1249 == LFHD.crc_32.offsetof + 4 ); 1250 static assert( LFHD.uncompressed_size.offsetof 1251 == LFHD.compressed_size.offsetof + 4 ); 1252 +/ 1253 1254 // Don't forget we have to seek past the signature, too 1255 // BUG: .offsetof is broken here 1256 /+seeker.seek(LFHD.crc_32.offsetof+4, seeker.Anchor.Current);+/ 1257 seeker.seek(10+4, seeker.Anchor.Current); 1258 write(output, crc_32); 1259 write(output, compressed_size); 1260 write(output, uncompressed_size); 1261 1262 with( entries[$-1] ) 1263 { 1264 data.crc_32 = crc_32; 1265 data.compressed_size = compressed_size; 1266 data.uncompressed_size = uncompressed_size; 1267 } 1268 } 1269 1270 /* 1271 * Generates and outputs a local file header from the given info block and 1272 * compression method. Note that the crc_32, compressed_size and 1273 * uncompressed_size header fields will be set to zero, and must be 1274 * patched. 1275 */ 1276 void put_local_header(ZipEntryInfo info, Method method) 1277 { 1278 LocalFileHeader.Data data; 1279 1280 data.compression_method = fromMethod(method); 1281 timeToDos(info.modified, data.modification_file_time, 1282 data.modification_file_date); 1283 1284 put_local_header(data, info.name); 1285 } 1286 1287 /* 1288 * Writes the given local file header data and filename out to the output 1289 * stream. It also appends a new Entry with the data and filename. 1290 */ 1291 void put_local_header(LocalFileHeaderData data, 1292 const(char)[] file_name) 1293 { 1294 auto f_name = Path.normalize(file_name); 1295 auto p = Path.parse(f_name); 1296 1297 // Compute Zip version 1298 if( data.extract_version == data.extract_version.max ) 1299 { 1300 1301 ushort zipver = 10; 1302 void minver(ushort v) { zipver = v>zipver ? v : zipver; } 1303 1304 { 1305 // Compression method 1306 switch( data.compression_method ) 1307 { 1308 case 0: minver(10); break; 1309 case 8: minver(20); break; 1310 default: 1311 assert(false); 1312 } 1313 1314 // File is a folder 1315 if( f_name.length > 0 && f_name[$-1] == '/' ) 1316 // Is a directory, not a real file 1317 minver(20); 1318 } 1319 data.extract_version = zipver; 1320 } 1321 1322 /+// Encode filename 1323 auto file_name_437 = utf8_to_cp437(file_name); 1324 if( file_name_437 is null ) 1325 ZipException.fnencode;+/ 1326 1327 /+// Set up file name length 1328 if( file_name_437.length > ushort.max ) 1329 ZipException.fntoolong; 1330 1331 data.file_name_length = file_name_437.length;+/ 1332 1333 LocalFileHeader header; 1334 header.data = data; 1335 if (p.isAbsolute) 1336 f_name = f_name[p.root.length+1..$]; 1337 header.file_name = f_name; 1338 1339 // Write out the header and the filename 1340 auto header_pos = seeker.seek(0, seeker.Anchor.Current); 1341 1342 write(output, LocalFileHeader.signature); 1343 header.put(output); 1344 1345 // Save the header 1346 assert( header_pos <= int.max ); 1347 Entry entry; 1348 entry.data.fromLocal(header.data); 1349 entry.filename = header.file_name; 1350 entry.header_position = header_pos; 1351 entry.data.relative_offset_of_local_header = cast(int) header_pos; 1352 entries ~= entry; 1353 } 1354 } 1355 1356 ////////////////////////////////////////////////////////////////////////////// 1357 ////////////////////////////////////////////////////////////////////////////// 1358 // 1359 // ZipEntry 1360 1361 /** 1362 * This class is used to represent a single entry in an archive. 1363 * Specifically, it combines meta-data about the file (see the info field) 1364 * along with the two basic operations on an entry: open and verify. 1365 */ 1366 class ZipEntry 1367 { 1368 /** 1369 * Header information on the file. See the ZipEntryInfo structure for 1370 * more information. 1371 */ 1372 ZipEntryInfo info; 1373 1374 /** 1375 * Size (in bytes) of the file's uncompressed contents. 1376 */ 1377 uint size() 1378 { 1379 return header.data.uncompressed_size; 1380 } 1381 1382 /** 1383 * Opens a stream for reading from the file. The contents of this stream 1384 * represent the decompressed contents of the file stored in the archive. 1385 * 1386 * You should not assume that the returned stream is seekable. 1387 * 1388 * Note that the returned stream may be safely closed without affecting 1389 * the underlying archive stream. 1390 * 1391 * If the file has not yet been verified, then the stream will be checked 1392 * as you read from it. When the stream is either exhausted or closed, 1393 * then the integrity of the file's data will be checked. This means that 1394 * if the file is corrupt, an exception will be thrown only after you have 1395 * finished reading from the stream. If you wish to make sure the data is 1396 * valid before you read from the file, call the verify method. 1397 */ 1398 InputStream open() 1399 { 1400 // If we haven't verified yet, wrap the stream in the appropriate 1401 // decorators. 1402 if( !verified ) 1403 return new ZipEntryVerifier(this, open_dg(header, false)); 1404 1405 else 1406 return open_dg(header, false); 1407 } 1408 1409 /** 1410 * Verifies the contents of this file by computing the CRC32 checksum, 1411 * and comparing it against the stored one. Throws an exception if the 1412 * checksums do not match. 1413 * 1414 * Not valid on streamed Zip archives. 1415 */ 1416 void verify() 1417 { 1418 // If we haven't verified the contents yet, just read everything in 1419 // to trigger it. 1420 auto s = open(); 1421 auto buffer = new ubyte[s.conduit.bufferSize]; 1422 while( s.read(buffer) != s.Eof ) 1423 {/*Do nothing*/} 1424 s.close(); 1425 } 1426 1427 /** 1428 * Creates a new, independent copy of this instance. 1429 */ 1430 ZipEntry dup() 1431 { 1432 return new ZipEntry(header, open_dg); 1433 } 1434 1435 private: 1436 /* 1437 * Callback used to open the file. 1438 */ 1439 alias InputStream delegate(FileHeader, bool raw) open_dg_t; 1440 open_dg_t open_dg; 1441 1442 /* 1443 * Raw ZIP header. 1444 */ 1445 FileHeader header; 1446 1447 /* 1448 * The flag used to keep track of whether the file's contents have been 1449 * verified. 1450 */ 1451 bool verified = false; 1452 1453 /* 1454 * Opens a stream that does not perform any decompression or 1455 * transformation of the file contents. This is used internally by 1456 * ZipWriter to perform fast zip to zip transfers without having to 1457 * decompress and then recompress the contents. 1458 * 1459 * Note that because zip stores CRCs for the *uncompressed* data, this 1460 * method currently does not do any verification. 1461 */ 1462 InputStream open_raw() 1463 { 1464 return open_dg(header, true); 1465 } 1466 1467 /* 1468 * Creates a new ZipEntry from the FileHeader. 1469 */ 1470 this(FileHeader header, open_dg_t open_dg) 1471 { 1472 this.reset(header, open_dg); 1473 } 1474 1475 /* 1476 * Resets the current instance with new values. 1477 */ 1478 ZipEntry reset(FileHeader header, open_dg_t open_dg) 1479 { 1480 this.header = header; 1481 this.open_dg = open_dg; 1482 with( info ) 1483 { 1484 name = Path.standard(header.file_name.dup); 1485 dosToTime(header.data.modification_file_time, 1486 header.data.modification_file_date, 1487 modified); 1488 comment = header.file_comment.dup; 1489 } 1490 1491 this.verified = false; 1492 1493 return this; 1494 } 1495 } 1496 1497 /** 1498 * This structure contains various pieces of meta-data on a file. The 1499 * contents of this structure may be safely mutated. 1500 * 1501 * This structure is also used to specify meta-data about a file when adding 1502 * it to an archive. 1503 */ 1504 struct ZipEntryInfo 1505 { 1506 /// Full path and file name of this file. 1507 const(char)[] name; 1508 /// Modification timestamp. If this is left uninitialised when passed to 1509 /// a ZipWriter, it will be reset to the current system time. 1510 Time modified = Time.min; 1511 /// Comment on the file. 1512 const(char)[] comment; 1513 } 1514 1515 ////////////////////////////////////////////////////////////////////////////// 1516 ////////////////////////////////////////////////////////////////////////////// 1517 ////////////////////////////////////////////////////////////////////////////// 1518 // 1519 // Exceptions 1520 // 1521 1522 /** 1523 * This is the base class from which all exceptions generated by this module 1524 * derive from. 1525 */ 1526 class ZipException : Exception 1527 { 1528 this(immutable(char)[] msg) { super(msg); } 1529 1530 private: 1531 alias typeof(this) thisT; 1532 static void opCall(immutable(char)[] msg) { throw new ZipException(msg); } 1533 1534 @property static void badsig() 1535 { 1536 thisT("corrupt signature or unexpected section found"); 1537 } 1538 1539 @property static void badsig(const(char)[] type) 1540 { 1541 thisT("corrupt "~type.idup~" signature or unexpected section found"); 1542 } 1543 1544 @property static void incons(const(char)[] name) 1545 { 1546 thisT("inconsistent headers for file \""~name.idup~"\"; " ~ 1547 "archive is likely corrupted"); 1548 } 1549 1550 @property static void missingdir() 1551 { 1552 thisT("could not locate central archive directory; " ~ 1553 "file is corrupt or possibly not a Zip archive"); 1554 } 1555 1556 @property static void toomanyentries() 1557 { 1558 thisT("too many archive entries"); 1559 } 1560 1561 @property static void toolong() 1562 { 1563 thisT("archive is too long; limited to 4GB total"); 1564 } 1565 1566 @property static void cdtoolong() 1567 { 1568 thisT("central directory is too long; limited to 4GB total"); 1569 } 1570 1571 @property static void fntoolong() 1572 { 1573 thisT("file name too long; limited to 65,535 characters"); 1574 } 1575 1576 @property static void eftoolong() 1577 { 1578 thisT("extra field too long; limited to 65,535 characters"); 1579 } 1580 1581 @property static void cotoolong() 1582 { 1583 thisT("extra field too long; limited to 65,535 characters"); 1584 } 1585 1586 @property static void fnencode() 1587 { 1588 thisT("could not encode filename into codepage 437"); 1589 } 1590 1591 @property static void coencode() 1592 { 1593 thisT("could not encode comment into codepage 437"); 1594 } 1595 1596 @property static void tooold() 1597 { 1598 thisT("cannot represent dates before January 1, 1980"); 1599 } 1600 } 1601 1602 /** 1603 * This exception is thrown if a ZipReader detects that a file's contents do 1604 * not match the stored checksum. 1605 */ 1606 class ZipChecksumException : ZipException 1607 { 1608 this(const(char)[] name) 1609 { 1610 super("checksum failed on zip entry \""~name.idup~"\""); 1611 } 1612 1613 private: 1614 static void opCall(const(char)[] name) { throw new ZipChecksumException(name); } 1615 } 1616 1617 /** 1618 * This exception is thrown if you call get reader method when there are no 1619 * more files in the archive. 1620 */ 1621 class ZipExhaustedException : ZipException 1622 { 1623 this() { super("no more entries in archive"); } 1624 1625 private: 1626 static void opCall() { throw new ZipExhaustedException; } 1627 } 1628 1629 /** 1630 * This exception is thrown if you attempt to read an archive that uses 1631 * features not supported by the reader. 1632 */ 1633 class ZipNotSupportedException : ZipException 1634 { 1635 this(immutable(char)[] msg) { super(msg); } 1636 1637 private: 1638 alias ZipNotSupportedException thisT; 1639 1640 static void opCall(const(char)[] msg) 1641 { 1642 throw new thisT(msg.idup ~ " not supported"); 1643 } 1644 1645 static void spanned() 1646 { 1647 thisT("split and multi-disk archives"); 1648 } 1649 1650 static void zipver(ushort ver) 1651 { 1652 throw new thisT("zip format version " 1653 ~Integer.toString(ver / 10).idup 1654 ~"." 1655 ~Integer.toString(ver % 10).idup 1656 ~" not supported; maximum of version " 1657 ~Integer.toString(MAX_EXTRACT_VERSION / 10).idup 1658 ~"." 1659 ~Integer.toString(MAX_EXTRACT_VERSION % 10).idup 1660 ~" supported."); 1661 } 1662 1663 static void flags() 1664 { 1665 throw new thisT("unknown or unsupported file flags enabled"); 1666 } 1667 1668 static void method(ushort m) 1669 { 1670 // Cheat here and work out what the method *actually* is 1671 immutable(char)[] ms; 1672 switch( m ) 1673 { 1674 case 0: 1675 case 8: assert(false); // supported 1676 1677 case 1: ms = "Shrink"; break; 1678 case 2: ms = "Reduce (factor 1)"; break; 1679 case 3: ms = "Reduce (factor 2)"; break; 1680 case 4: ms = "Reduce (factor 3)"; break; 1681 case 5: ms = "Reduce (factor 4)"; break; 1682 case 6: ms = "Implode"; break; 1683 1684 case 9: ms = "Deflate64"; break; 1685 case 10: ms = "TERSE (old)"; break; 1686 1687 case 12: ms = "Bzip2"; break; 1688 case 14: ms = "LZMA"; break; 1689 1690 case 18: ms = "TERSE (new)"; break; 1691 case 19: ms = "LZ77"; break; 1692 1693 case 97: ms = "WavPack"; break; 1694 case 98: ms = "PPMd"; break; 1695 1696 default: ms = "unknown"; 1697 } 1698 1699 thisT(ms ~ " compression method"); 1700 } 1701 } 1702 1703 ////////////////////////////////////////////////////////////////////////////// 1704 ////////////////////////////////////////////////////////////////////////////// 1705 // 1706 // Convenience methods 1707 1708 void createArchive(const(char)[] archive, Method method, const(char[])[] files...) 1709 { 1710 scope zw = new ZipBlockWriter(archive); 1711 zw.method = method; 1712 1713 foreach( file ; files ) 1714 { 1715 ZipEntryInfo zi; 1716 zi.name = file; 1717 zi.modified = Path.modified(file); 1718 1719 zw.putFile(zi, file); 1720 } 1721 1722 zw.finish(); 1723 } 1724 1725 void extractArchive(const(char)[] archive, const(char)[] dest) 1726 { 1727 scope zr = new ZipBlockReader(archive); 1728 1729 foreach( entry ; zr ) 1730 { 1731 // Skip directories 1732 if( entry.info.name[$-1] == '/' || 1733 entry.info.name[$-1] == '\\') continue; 1734 1735 auto path = Path.join(dest, entry.info.name); 1736 path = Path.normalize(path); 1737 1738 // Create the parent directory if necessary. 1739 auto parent = Path.parse(path).parent; 1740 if( !Path.exists(parent) ) 1741 { 1742 Path.createPath(parent); 1743 } 1744 1745 path = Path.native(path); 1746 1747 // Write out the file 1748 scope fout = new File(path, File.WriteCreate); 1749 fout.copy(entry.open()); 1750 fout.close(); 1751 1752 // Update timestamps 1753 auto oldTS = Path.timeStamps(path); 1754 Path.timeStamps(path, oldTS.accessed, entry.info.modified); 1755 } 1756 } 1757 1758 ////////////////////////////////////////////////////////////////////////////// 1759 ////////////////////////////////////////////////////////////////////////////// 1760 ////////////////////////////////////////////////////////////////////////////// 1761 // 1762 // Private implementation stuff 1763 // 1764 1765 private: 1766 1767 ////////////////////////////////////////////////////////////////////////////// 1768 ////////////////////////////////////////////////////////////////////////////// 1769 // 1770 // Verification stuff 1771 1772 /* 1773 * This class wraps an input stream, and computes the CRC as it passes 1774 * through. On the event of either a close or EOF, it checks the CRC against 1775 * the one in the provided ZipEntry. If they don't match, it throws an 1776 * exception. 1777 */ 1778 1779 class ZipEntryVerifier : InputStream 1780 { 1781 this(ZipEntry entry, InputStream source) 1782 in 1783 { 1784 assert( entry !is null ); 1785 assert( source !is null ); 1786 } 1787 body 1788 { 1789 this.entry = entry; 1790 this.digest = new Crc32; 1791 this.source = new DigestInput(source, digest); 1792 } 1793 1794 IConduit conduit() 1795 { 1796 return source.conduit; 1797 } 1798 1799 InputStream input() 1800 { 1801 return source; 1802 } 1803 1804 long seek (long ofs, Anchor anchor = Anchor.Begin) 1805 { 1806 return source.seek (ofs, anchor); 1807 } 1808 1809 void close() 1810 { 1811 check(); 1812 1813 this.source.close(); 1814 this.entry = null; 1815 this.digest = null; 1816 this.source = null; 1817 } 1818 1819 size_t read(void[] dst) 1820 { 1821 auto bytes = source.read(dst); 1822 if( bytes == IConduit.Eof ) 1823 check(); 1824 return bytes; 1825 } 1826 1827 override void[] load(size_t max=-1) 1828 { 1829 return Conduit.load(this, max); 1830 } 1831 1832 override InputStream flush() 1833 { 1834 this.source.flush(); 1835 return this; 1836 } 1837 1838 private: 1839 Crc32 digest; 1840 InputStream source; 1841 ZipEntry entry; 1842 1843 void check() 1844 { 1845 if( digest is null ) return; 1846 1847 auto crc = digest.crc32Digest(); 1848 destroy(digest); 1849 1850 if( crc != entry.header.data.crc_32 ) 1851 ZipChecksumException(entry.info.name); 1852 1853 else 1854 entry.verified = true; 1855 } 1856 } 1857 1858 ////////////////////////////////////////////////////////////////////////////// 1859 ////////////////////////////////////////////////////////////////////////////// 1860 // 1861 // IO functions 1862 1863 /* 1864 * Really, seriously, read some bytes without having to go through a sodding 1865 * buffer. 1866 */ 1867 void readExact(InputStream s, void[] dst) 1868 { 1869 //Stderr.formatln("readExact(s, [0..{}])", dst.length); 1870 while( dst.length > 0 ) 1871 { 1872 auto octets = s.read(dst); 1873 //Stderr.formatln(" . octets = {}", octets); 1874 if( octets == -1 ) // Beware the dangers of MAGICAL THINKING 1875 throw new Exception("unexpected end of stream"); 1876 dst = dst[octets..$]; 1877 } 1878 } 1879 1880 /* 1881 * Really, seriously, write some bytes. 1882 */ 1883 void writeExact(OutputStream s, const(void)[] src) 1884 { 1885 while( src.length > 0 ) 1886 { 1887 auto octets = s.write(src); 1888 if( octets == -1 ) 1889 throw new Exception("unexpected end of stream"); 1890 src = src[octets..$]; 1891 } 1892 } 1893 1894 void write(T)(OutputStream s, T value) 1895 { 1896 version( BigEndian ) swap(value); 1897 writeExact(s, (&value)[0..1]); 1898 } 1899 1900 ////////////////////////////////////////////////////////////////////////////// 1901 ////////////////////////////////////////////////////////////////////////////// 1902 // 1903 // Endian garbage 1904 1905 void swapAll(T)(ref T data) 1906 { 1907 static if( is(typeof(T.record_fields)) ) 1908 const fields = T.record_fields; 1909 else 1910 const fields = data.tupleof.length; 1911 1912 foreach( i,_ ; data.tupleof ) 1913 { 1914 if( i == fields ) break; 1915 swap(data.tupleof[i]); 1916 } 1917 } 1918 1919 void swap(T)(ref T data) 1920 { 1921 static if( T.sizeof == 1 ) 1922 {} 1923 else static if( T.sizeof == 2 ) 1924 ByteSwap.swap16(&data, 2); 1925 else static if( T.sizeof == 4 ) 1926 ByteSwap.swap32(&data, 4); 1927 else static if( T.sizeof == 8 ) 1928 ByteSwap.swap64(&data, 8); 1929 else static if( T.sizeof == 10 ) 1930 ByteSwap.swap80(&data, 10); 1931 else 1932 static assert(false, "Can't swap "~T.stringof~"s."); 1933 } 1934 1935 ////////////////////////////////////////////////////////////////////////////// 1936 ////////////////////////////////////////////////////////////////////////////// 1937 // 1938 // IBM Code Page 437 stuff 1939 // 1940 1941 const char[][] cp437_to_utf8_map_low = [ 1942 "\u0000"[], "\u263a", "\u263b", "\u2665", 1943 "\u2666", "\u2663", "\u2660", "\u2022", 1944 "\u25d8", "\u25cb", "\u25d9", "\u2642", 1945 "\u2640", "\u266a", "\u266b", "\u263c", 1946 1947 "\u25b6", "\u25c0", "\u2195", "\u203c", 1948 "\u00b6", "\u00a7", "\u25ac", "\u21a8", 1949 "\u2191", "\u2193", "\u2192", "\u2190", 1950 "\u221f", "\u2194", "\u25b2", "\u25bc" 1951 ]; 1952 1953 const char[][] cp437_to_utf8_map_high = [ 1954 "\u00c7"[], "\u00fc", "\u00e9", "\u00e2", 1955 "\u00e4", "\u00e0", "\u00e5", "\u00e7", 1956 "\u00ea", "\u00eb", "\u00e8", "\u00ef", 1957 "\u00ee", "\u00ec", "\u00c4", "\u00c5", 1958 1959 "\u00c9", "\u00e6", "\u00c6", "\u00f4", 1960 "\u00f6", "\u00f2", "\u00fb", "\u00f9", 1961 "\u00ff", "\u00d6", "\u00dc", "\u00f8", 1962 "\u00a3", "\u00a5", "\u20a7", "\u0192", 1963 1964 "\u00e1", "\u00ed", "\u00f3", "\u00fa", 1965 "\u00f1", "\u00d1", "\u00aa", "\u00ba", 1966 "\u00bf", "\u2310", "\u00ac", "\u00bd", 1967 "\u00bc", "\u00a1", "\u00ab", "\u00bb", 1968 1969 "\u2591", "\u2592", "\u2593", "\u2502", 1970 "\u2524", "\u2561", "\u2562", "\u2556", 1971 "\u2555", "\u2563", "\u2551", "\u2557", 1972 "\u255d", "\u255c", "\u255b", "\u2510", 1973 1974 "\u2514", "\u2534", "\u252c", "\u251c", 1975 "\u2500", "\u253c", "\u255e", "\u255f", 1976 "\u255a", "\u2554", "\u2569", "\u2566", 1977 "\u2560", "\u2550", "\u256c", "\u2567", 1978 1979 "\u2568", "\u2564", "\u2565", "\u2559", 1980 "\u2558", "\u2552", "\u2553", "\u256b", 1981 "\u256a", "\u2518", "\u250c", "\u2588", 1982 "\u2584", "\u258c", "\u2590", "\u2580", 1983 "\u03b1", "\u00df", "\u0393", "\u03c0", 1984 "\u03a3", "\u03c3", "\u00b5", "\u03c4", 1985 "\u03a6", "\u0398", "\u03a9", "\u03b4", 1986 "\u221e", "\u03c6", "\u03b5", "\u2229", 1987 1988 "\u2261", "\u00b1", "\u2265", "\u2264", 1989 "\u2320", "\u2321", "\u00f7", "\u2248", 1990 "\u00b0", "\u2219", "\u00b7", "\u221a", 1991 "\u207f", "\u00b2", "\u25a0", "\u00a0" 1992 ]; 1993 1994 inout(char[]) cp437_to_utf8(inout(ubyte[]) s) 1995 { 1996 foreach( i,c ; s ) 1997 { 1998 if( (1 <= c && c <= 31) || c >= 127 ) 1999 { 2000 /* Damn; we got a character not in ASCII. Since this is the first 2001 * non-ASCII character we found, copy everything up to this point 2002 * into the output verbatim. We'll allocate twice as much space 2003 * as there are remaining characters to ensure we don't need to do 2004 * any further allocations. 2005 */ 2006 auto r = new char[i+2*(s.length-i)]; 2007 r[0..i] = (cast(char[]) s[0..i])[]; 2008 size_t k=i; // current length 2009 2010 // We insert new characters at r[i+j+k] 2011 2012 foreach( d ; s[i..$] ) 2013 { 2014 if( 32 <= d && d <= 126 || d == 0 ) 2015 { 2016 r[k++] = d; 2017 } 2018 else if( 1 <= d && d <= 31 ) 2019 { 2020 const(char)[] repl = cp437_to_utf8_map_low[d]; 2021 r[k..k+repl.length] = repl[]; 2022 k += repl.length; 2023 } 2024 else if( d == 127 ) 2025 { 2026 const(char)[] repl = "\u2302"; 2027 r[k..k+repl.length] = repl[]; 2028 k += repl.length; 2029 } 2030 else if( d > 127 ) 2031 { 2032 const(char)[] repl = cp437_to_utf8_map_high[d-128]; 2033 r[k..k+repl.length] = repl[]; 2034 k += repl.length; 2035 } 2036 else 2037 assert(false); 2038 } 2039 2040 return cast(typeof(return))r[0..k]; 2041 } 2042 } 2043 2044 /* If we got here, then all the characters in s are also in ASCII, which 2045 * means it's also valid UTF-8; return the string unmodified. 2046 */ 2047 return cast(typeof(return)) s; 2048 } 2049 2050 debug( UnitTest ) 2051 { 2052 unittest 2053 { 2054 const(char)[] c(const(char)[] s) { return cp437_to_utf8(cast(const(ubyte)[]) s); } 2055 2056 auto s = c("Hi there \x01 old \x0c!"); 2057 assert( s == "Hi there \u263a old \u2640!", "\""~s~"\"" ); 2058 s = c("Marker \x7f and divide \xf6."); 2059 assert( s == "Marker \u2302 and divide \u00f7.", "\""~s~"\"" ); 2060 } 2061 } 2062 2063 __gshared const char[dchar] utf8_to_cp437_map; 2064 2065 shared static this() 2066 { 2067 utf8_to_cp437_map = [ 2068 '\u0000': '\x00', '\u263a': '\x01', '\u263b': '\x02', '\u2665': '\x03', 2069 '\u2666': '\x04', '\u2663': '\x05', '\u2660': '\x06', '\u2022': '\x07', 2070 '\u25d8': '\x08', '\u25cb': '\x09', '\u25d9': '\x0a', '\u2642': '\x0b', 2071 '\u2640': '\x0c', '\u266a': '\x0d', '\u266b': '\x0e', '\u263c': '\x0f', 2072 2073 '\u25b6': '\x10', '\u25c0': '\x11', '\u2195': '\x12', '\u203c': '\x13', 2074 '\u00b6': '\x14', '\u00a7': '\x15', '\u25ac': '\x16', '\u21a8': '\x17', 2075 '\u2191': '\x18', '\u2193': '\x19', '\u2192': '\x1a', '\u2190': '\x1b', 2076 '\u221f': '\x1c', '\u2194': '\x1d', '\u25b2': '\x1e', '\u25bc': '\x1f', 2077 2078 /* 2079 * Printable ASCII range (well, most of it) is handled specially. 2080 */ 2081 2082 '\u00c7': '\x80', '\u00fc': '\x81', '\u00e9': '\x82', '\u00e2': '\x83', 2083 '\u00e4': '\x84', '\u00e0': '\x85', '\u00e5': '\x86', '\u00e7': '\x87', 2084 '\u00ea': '\x88', '\u00eb': '\x89', '\u00e8': '\x8a', '\u00ef': '\x8b', 2085 '\u00ee': '\x8c', '\u00ec': '\x8d', '\u00c4': '\x8e', '\u00c5': '\x8f', 2086 2087 '\u00c9': '\x90', '\u00e6': '\x91', '\u00c6': '\x92', '\u00f4': '\x93', 2088 '\u00f6': '\x94', '\u00f2': '\x95', '\u00fb': '\x96', '\u00f9': '\x97', 2089 '\u00ff': '\x98', '\u00d6': '\x99', '\u00dc': '\x9a', '\u00f8': '\x9b', 2090 '\u00a3': '\x9c', '\u00a5': '\x9d', '\u20a7': '\x9e', '\u0192': '\x9f', 2091 2092 '\u00e1': '\xa0', '\u00ed': '\xa1', '\u00f3': '\xa2', '\u00fa': '\xa3', 2093 '\u00f1': '\xa4', '\u00d1': '\xa5', '\u00aa': '\xa6', '\u00ba': '\xa7', 2094 '\u00bf': '\xa8', '\u2310': '\xa9', '\u00ac': '\xaa', '\u00bd': '\xab', 2095 '\u00bc': '\xac', '\u00a1': '\xad', '\u00ab': '\xae', '\u00bb': '\xaf', 2096 2097 '\u2591': '\xb0', '\u2592': '\xb1', '\u2593': '\xb2', '\u2502': '\xb3', 2098 '\u2524': '\xb4', '\u2561': '\xb5', '\u2562': '\xb6', '\u2556': '\xb7', 2099 '\u2555': '\xb8', '\u2563': '\xb9', '\u2551': '\xba', '\u2557': '\xbb', 2100 '\u255d': '\xbc', '\u255c': '\xbd', '\u255b': '\xbe', '\u2510': '\xbf', 2101 2102 '\u2514': '\xc0', '\u2534': '\xc1', '\u252c': '\xc2', '\u251c': '\xc3', 2103 '\u2500': '\xc4', '\u253c': '\xc5', '\u255e': '\xc6', '\u255f': '\xc7', 2104 '\u255a': '\xc8', '\u2554': '\xc9', '\u2569': '\xca', '\u2566': '\xcb', 2105 '\u2560': '\xcc', '\u2550': '\xcd', '\u256c': '\xce', '\u2567': '\xcf', 2106 2107 '\u2568': '\xd0', '\u2564': '\xd1', '\u2565': '\xd2', '\u2559': '\xd3', 2108 '\u2558': '\xd4', '\u2552': '\xd5', '\u2553': '\xd6', '\u256b': '\xd7', 2109 '\u256a': '\xd8', '\u2518': '\xd9', '\u250c': '\xda', '\u2588': '\xdb', 2110 '\u2584': '\xdc', '\u258c': '\xdd', '\u2590': '\xde', '\u2580': '\xdf', 2111 2112 '\u03b1': '\xe0', '\u00df': '\xe1', '\u0393': '\xe2', '\u03c0': '\xe3', 2113 '\u03a3': '\xe4', '\u03c3': '\xe5', '\u00b5': '\xe6', '\u03c4': '\xe7', 2114 '\u03a6': '\xe8', '\u0398': '\xe9', '\u03a9': '\xea', '\u03b4': '\xeb', 2115 '\u221e': '\xec', '\u03c6': '\xed', '\u03b5': '\xee', '\u2229': '\xef', 2116 2117 '\u2261': '\xf0', '\u00b1': '\xf1', '\u2265': '\xf2', '\u2264': '\xf3', 2118 '\u2320': '\xf4', '\u2321': '\xf5', '\u00f7': '\xf6', '\u2248': '\xf7', 2119 '\u00b0': '\xf8', '\u2219': '\xf9', '\u00b7': '\xfa', '\u221a': '\xfb', 2120 '\u207f': '\xfc', '\u00b2': '\xfd', '\u25a0': '\xfe', '\u00a0': '\xff' 2121 ]; 2122 } 2123 2124 inout(ubyte)[] utf8_to_cp437(inout(char)[] s) 2125 { 2126 alias typeof(return) ret_type; /* Some sort of strange bug here */ 2127 ubyte[] bug_6867(const(char)[] cs) 2128 { 2129 foreach( i,dchar c ; cs ) 2130 { 2131 if( !((32 <= c && c <= 126) || c == 0) ) 2132 { 2133 /* We got a character not in CP 437: we need to create a buffer to 2134 * hold the new string. Since UTF-8 is *always* larger than CP 2135 * 437, we need, at most, an array of the same number of elements. 2136 */ 2137 auto r = new ubyte[cs.length]; 2138 r[0..i] = (cast(ubyte[]) cs[0..i])[]; 2139 size_t k=i; 2140 2141 foreach( dchar d ; cs[i..$] ) 2142 { 2143 if( 32 <= d && d <= 126 || d == 0 ) 2144 r[k++] = cast(ubyte)d; 2145 2146 else if( d == '\u2302' ) 2147 r[k++] = '\x7f'; 2148 2149 else if( auto e_ptr = d in utf8_to_cp437_map ) 2150 r[k++] = *e_ptr; 2151 2152 else 2153 { 2154 throw new Exception("cannot encode character \"" 2155 ~ Integer.toString(cast(uint)d).idup 2156 ~ "\" in codepage 437."); 2157 } 2158 } 2159 2160 return r[0..k]; 2161 } 2162 } 2163 2164 return null; 2165 } 2166 2167 auto ret = bug_6867(s); 2168 if (ret !is null) 2169 return cast(ret_type)ret; 2170 2171 // If we got here, then the entire string is printable ASCII, which just 2172 // happens to *also* be valid CP 437! Huzzah! 2173 return cast(typeof(return)) s; 2174 } 2175 2176 debug( UnitTest ) 2177 { 2178 unittest 2179 { 2180 alias cp437_to_utf8 x; 2181 alias utf8_to_cp437 y; 2182 2183 ubyte[256] s; 2184 foreach( i,ref c ; s ) 2185 c = cast(ubyte)i; 2186 2187 auto a = x(s); 2188 auto b = y(a); 2189 if(!( b == s )) 2190 { 2191 // Display list of characters that failed to convert as expected, 2192 // and what value we got. 2193 auto hex = "0123456789abcdef"; 2194 auto msg = "".dup; 2195 foreach( i,ch ; b ) 2196 { 2197 if( ch != i ) 2198 { 2199 msg ~= hex[i>>4]; 2200 msg ~= hex[i&15]; 2201 msg ~= " ("; 2202 msg ~= hex[ch>>4]; 2203 msg ~= hex[ch&15]; 2204 msg ~= "), "; 2205 } 2206 } 2207 msg ~= "failed."; 2208 2209 assert( false, msg ); 2210 } 2211 } 2212 } 2213 2214 /* 2215 * This is here to simplify the code elsewhere. 2216 */ 2217 inout(char[]) utf8_to_utf8(inout(ubyte[]) s) { return cast(typeof(return)) s; } 2218 ubyte[] utf8_to_utf8(char[] s) { return cast(ubyte[]) s; } 2219 2220 ////////////////////////////////////////////////////////////////////////////// 2221 ////////////////////////////////////////////////////////////////////////////// 2222 // 2223 // Date/time stuff 2224 2225 void dosToTime(ushort dostime, ushort dosdate, out Time time) 2226 { 2227 uint sec, min, hour, day, mon, year; 2228 sec = (dostime & 0b00000_000000_11111) * 2; 2229 min = (dostime & 0b00000_111111_00000) >> 5; 2230 hour= (dostime & 0b11111_000000_00000) >> 11; 2231 day = (dosdate & 0b0000000_0000_11111); 2232 mon = (dosdate & 0b0000000_1111_00000) >> 5; 2233 year=((dosdate & 0b1111111_0000_00000) >> 9) + 1980; 2234 2235 // This code rules! 2236 time = Gregorian.generic.toTime(year, mon, day, hour, min, sec); 2237 } 2238 2239 void timeToDos(Time time, out ushort dostime, out ushort dosdate) 2240 { 2241 // Treat Time.min specially 2242 if( time == Time.min ) 2243 time = WallClock.now; 2244 2245 // *muttering happily* 2246 auto date = Gregorian.generic.toDate(time); 2247 if( date.year < 1980 ) 2248 ZipException.tooold; 2249 2250 auto tod = time.time(); 2251 dostime = cast(ushort) ( 2252 (tod.seconds / 2) 2253 | (tod.minutes << 5) 2254 | (tod.hours << 11)); 2255 2256 dosdate = cast(ushort) ( 2257 (date.day) 2258 | (date.month << 5) 2259 | ((date.year - 1980) << 9)); 2260 } 2261 2262 // ************************************************************************** // 2263 // ************************************************************************** // 2264 // ************************************************************************** // 2265 2266 // Dependencies 2267 private: 2268 2269 import tango.io.device.Conduit : Conduit; 2270 2271 /******************************************************************************* 2272 2273 copyright: Copyright © 2007 Daniel Keep. All rights reserved. 2274 2275 license: BSD style: $(LICENSE) 2276 2277 version: Prerelease 2278 2279 author: Daniel Keep 2280 2281 *******************************************************************************/ 2282 2283 //module tangox.io.stream.CounterStream; 2284 2285 //import tango.io.device.Conduit : Conduit; 2286 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; 2287 2288 /** 2289 * The counter stream classes are used to keep track of how many bytes flow 2290 * through a stream. 2291 * 2292 * To use them, simply wrap it around an existing stream. The number of bytes 2293 * that have flowed through the wrapped stream may be accessed using the 2294 * count member. 2295 */ 2296 class CounterInput : InputStream 2297 { 2298 /// 2299 this(InputStream input) 2300 in 2301 { 2302 assert( input !is null ); 2303 } 2304 body 2305 { 2306 this.source = input; 2307 } 2308 2309 override IConduit conduit() 2310 { 2311 return source.conduit; 2312 } 2313 2314 InputStream input() 2315 { 2316 return source; 2317 } 2318 2319 long seek (long ofs, Anchor anchor = Anchor.Begin) 2320 { 2321 return source.seek (ofs, anchor); 2322 } 2323 2324 override void close() 2325 { 2326 source.close(); 2327 source = null; 2328 } 2329 2330 override size_t read(void[] dst) 2331 { 2332 auto read = source.read(dst); 2333 if( read != IConduit.Eof ) 2334 _count += read; 2335 return read; 2336 } 2337 2338 override void[] load(size_t max=-1) 2339 { 2340 return Conduit.load(this, max); 2341 } 2342 2343 override InputStream flush() 2344 { 2345 source.flush(); 2346 return this; 2347 } 2348 2349 /// 2350 long count() { return _count; } 2351 2352 private: 2353 InputStream source; 2354 long _count; 2355 } 2356 2357 /// ditto 2358 class CounterOutput : OutputStream 2359 { 2360 /// 2361 this(OutputStream output) 2362 in 2363 { 2364 assert( output !is null ); 2365 } 2366 body 2367 { 2368 this.sink = output; 2369 } 2370 2371 override IConduit conduit() 2372 { 2373 return sink.conduit; 2374 } 2375 2376 OutputStream output() 2377 { 2378 return sink; 2379 } 2380 2381 long seek (long ofs, Anchor anchor = Anchor.Begin) 2382 { 2383 return sink.seek (ofs, anchor); 2384 } 2385 2386 override void close() 2387 { 2388 sink.close(); 2389 sink = null; 2390 } 2391 2392 override size_t write(const(void)[] dst) 2393 { 2394 auto wrote = sink.write(dst); 2395 if( wrote != IConduit.Eof ) 2396 _count += wrote; 2397 return wrote; 2398 } 2399 2400 override OutputStream copy(InputStream src, size_t max=-1) 2401 { 2402 Conduit.transfer(src, this, max); 2403 return this; 2404 } 2405 2406 override OutputStream flush() 2407 { 2408 sink.flush(); 2409 return this; 2410 } 2411 2412 /// 2413 long count() { return _count; } 2414 2415 private: 2416 OutputStream sink; 2417 long _count; 2418 } 2419 2420 /******************************************************************************* 2421 2422 copyright: Copyright © 2007 Daniel Keep. All rights reserved. 2423 2424 license: BSD style: $(LICENSE) 2425 2426 version: Prerelease 2427 2428 author: Daniel Keep 2429 2430 *******************************************************************************/ 2431 2432 //module tangox.io.stream.SliceStream; 2433 2434 //import tango.io.device.Conduit : Conduit; 2435 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; 2436 2437 /** 2438 * This stream can be used to provide stream-based access to a subset of 2439 * another stream. It is akin to slicing an array. 2440 * 2441 * This stream fully supports seeking, and as such requires that the 2442 * underlying stream also support seeking. 2443 */ 2444 class SliceSeekInputStream : InputStream 2445 { 2446 //alias IConduit.Seek.Anchor Anchor; 2447 2448 /** 2449 * Create a new slice stream from the given source, covering the content 2450 * starting at position begin, for length bytes. 2451 */ 2452 this(InputStream source, long begin, long length) 2453 in 2454 { 2455 assert( source !is null ); 2456 assert( (cast(IConduit.Seek) source.conduit) !is null ); 2457 assert( begin >= 0 ); 2458 assert( length >= 0 ); 2459 } 2460 body 2461 { 2462 this.source = source; 2463 this.seeker = source; //cast(IConduit.Seek) source; 2464 this.begin = begin; 2465 this.length = length; 2466 } 2467 2468 override IConduit conduit() 2469 { 2470 return source.conduit; 2471 } 2472 2473 override void close() 2474 { 2475 source = null; 2476 seeker = null; 2477 } 2478 2479 override size_t read(void[] dst) 2480 { 2481 // If we're at the end of the slice, return eof 2482 if( _position >= length ) 2483 return IConduit.Eof; 2484 2485 // Otherwise, make sure we don't try to read past the end of the slice 2486 if( _position+dst.length > length ) 2487 dst.length = cast(size_t) (length-_position); 2488 2489 // Seek source stream to the appropriate location. 2490 if( seeker.seek(0, Anchor.Current) != begin+_position ) 2491 seeker.seek(begin+_position, Anchor.Begin); 2492 2493 // Do the read 2494 auto read = source.read(dst); 2495 if( read == IConduit.Eof ) 2496 // If we got an Eof, we'll consider that a bug for the moment. 2497 // TODO: proper exception 2498 throw new Exception("unexpected end-of-stream"); 2499 2500 _position += read; 2501 return read; 2502 } 2503 2504 override void[] load(size_t max=-1) 2505 { 2506 return Conduit.load(this, max); 2507 } 2508 2509 override InputStream flush() 2510 { 2511 source.flush(); 2512 return this; 2513 } 2514 2515 InputStream input() 2516 { 2517 return source; 2518 } 2519 2520 override long seek(long offset, Anchor anchor = cast(Anchor)0) 2521 { 2522 switch( anchor ) 2523 { 2524 case Anchor.Begin: 2525 _position = offset; 2526 break; 2527 2528 case Anchor.Current: 2529 _position += offset; 2530 if( _position < 0 ) _position = 0; 2531 break; 2532 2533 case Anchor.End: 2534 _position = length+offset; 2535 if( _position < 0 ) _position = 0; 2536 break; 2537 2538 default: 2539 assert(false); 2540 } 2541 2542 return _position; 2543 } 2544 2545 private: 2546 InputStream source; 2547 InputStream seeker; 2548 2549 long _position, begin, length; 2550 2551 invariant() 2552 { 2553 assert( cast(Object) source is cast(Object) seeker ); 2554 assert( begin >= 0 ); 2555 assert( length >= 0 ); 2556 assert( _position >= 0 ); 2557 } 2558 } 2559 2560 /** 2561 * This stream can be used to provide stream-based access to a subset of 2562 * another stream. It is akin to slicing an array. 2563 */ 2564 class SliceInputStream : InputStream 2565 { 2566 /** 2567 * Create a new slice stream from the given source, covering the content 2568 * starting at the current seek position for length bytes. 2569 */ 2570 this(InputStream source, long length) 2571 in 2572 { 2573 assert( source !is null ); 2574 assert( length >= 0 ); 2575 } 2576 body 2577 { 2578 this.source = source; 2579 this._length = length; 2580 } 2581 2582 override IConduit conduit() 2583 { 2584 return source.conduit; 2585 } 2586 2587 override void close() 2588 { 2589 source = null; 2590 } 2591 2592 InputStream input() 2593 { 2594 return source; 2595 } 2596 2597 long seek (long ofs, Anchor anchor = Anchor.Begin) 2598 { 2599 return source.seek (ofs, anchor); 2600 } 2601 2602 override size_t read(void[] dst) 2603 { 2604 // If we're at the end of the slice, return eof 2605 if( _length <= 0 ) 2606 return IConduit.Eof; 2607 2608 // Otherwise, make sure we don't try to read past the end of the slice 2609 if( dst.length > _length ) 2610 dst.length = cast(size_t) _length; 2611 2612 // Do the read 2613 auto read = source.read(dst); 2614 if( read == IConduit.Eof ) 2615 // If we got an Eof, we'll consider that a bug for the moment. 2616 // TODO: proper exception 2617 throw new Exception("unexpected end-of-stream"); 2618 2619 _length -= read; 2620 return read; 2621 } 2622 2623 override void[] load(size_t max=-1) 2624 { 2625 return Conduit.load(this, max); 2626 } 2627 2628 override InputStream flush() 2629 { 2630 source.flush(); 2631 return this; 2632 } 2633 2634 private: 2635 InputStream source; 2636 long _length; 2637 2638 invariant() 2639 { 2640 if( _length > 0 ) assert( source !is null ); 2641 } 2642 } 2643 2644 /** 2645 * This stream can be used to provide stream-based access to a subset of 2646 * another stream. It is akin to slicing an array. 2647 * 2648 * This stream fully supports seeking, and as such requires that the 2649 * underlying stream also support seeking. 2650 */ 2651 class SliceSeekOutputStream : OutputStream 2652 { 2653 //alias IConduit.Seek.Anchor Anchor; 2654 2655 /** 2656 * Create a new slice stream from the given source, covering the content 2657 * starting at position begin, for length bytes. 2658 */ 2659 this(OutputStream source, long begin, long length) 2660 in 2661 { 2662 assert( (cast(IConduit.Seek) source.conduit) !is null ); 2663 assert( begin >= 0 ); 2664 assert( length >= 0 ); 2665 } 2666 body 2667 { 2668 this.source = source; 2669 this.seeker = source; //cast(IConduit.Seek) source; 2670 this.begin = begin; 2671 this.length = length; 2672 } 2673 2674 override IConduit conduit() 2675 { 2676 return source.conduit; 2677 } 2678 2679 override void close() 2680 { 2681 source = null; 2682 seeker = null; 2683 } 2684 2685 size_t write(const(void)[] src) 2686 { 2687 // If we're at the end of the slice, return eof 2688 if( _position >= length ) 2689 return IConduit.Eof; 2690 2691 // Otherwise, make sure we don't try to write past the end of the 2692 // slice 2693 if( _position+src.length > length ) 2694 src.length = cast(size_t) (length-_position); 2695 2696 // Seek source stream to the appropriate location. 2697 if( seeker.seek(0, Anchor.Current) != begin+_position ) 2698 seeker.seek(begin+_position, Anchor.Begin); 2699 2700 // Do the write 2701 auto wrote = source.write(src); 2702 if( wrote == IConduit.Eof ) 2703 // If we got an Eof, we'll consider that a bug for the moment. 2704 // TODO: proper exception 2705 throw new Exception("unexpected end-of-stream"); 2706 2707 _position += wrote; 2708 return wrote; 2709 } 2710 2711 override OutputStream copy(InputStream src, size_t max=-1) 2712 { 2713 Conduit.transfer(src, this, max); 2714 return this; 2715 } 2716 2717 override OutputStream flush() 2718 { 2719 source.flush(); 2720 return this; 2721 } 2722 2723 override OutputStream output() 2724 { 2725 return source; 2726 } 2727 2728 override long seek(long offset, Anchor anchor = cast(Anchor)0) 2729 { 2730 switch( anchor ) 2731 { 2732 case Anchor.Begin: 2733 _position = offset; 2734 break; 2735 2736 case Anchor.Current: 2737 _position += offset; 2738 if( _position < 0 ) _position = 0; 2739 break; 2740 2741 case Anchor.End: 2742 _position = length+offset; 2743 if( _position < 0 ) _position = 0; 2744 break; 2745 2746 default: 2747 assert(false); 2748 } 2749 2750 return _position; 2751 } 2752 2753 private: 2754 OutputStream source; 2755 OutputStream seeker; 2756 2757 long _position, begin, length; 2758 2759 invariant() 2760 { 2761 assert( cast(Object) source is cast(Object) seeker ); 2762 assert( begin >= 0 ); 2763 assert( length >= 0 ); 2764 assert( _position >= 0 ); 2765 } 2766 } 2767 2768 /******************************************************************************* 2769 2770 copyright: Copyright © 2007 Daniel Keep. All rights reserved. 2771 2772 license: BSD style: $(LICENSE) 2773 2774 version: Prerelease 2775 2776 author: Daniel Keep 2777 2778 *******************************************************************************/ 2779 2780 //module tangox.io.stream.WrapStream; 2781 2782 //import tango.io.device.Conduit : Conduit; 2783 //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; 2784 2785 /** 2786 * This stream can be used to provide access to another stream. 2787 * Its distinguishing feature is that users cannot close the underlying 2788 * stream. 2789 * 2790 * This stream fully supports seeking, and as such requires that the 2791 * underlying stream also support seeking. 2792 */ 2793 class WrapSeekInputStream : InputStream 2794 { 2795 //alias IConduit.Seek.Anchor Anchor; 2796 2797 /** 2798 * Create a new wrap stream from the given source. 2799 */ 2800 this(InputStream source) 2801 in 2802 { 2803 assert( source !is null ); 2804 assert( (cast(IConduit.Seek) source.conduit) !is null ); 2805 } 2806 body 2807 { 2808 this.source = source; 2809 this.seeker = source; //cast(IConduit.Seek) source; 2810 this._position = seeker.seek(0, Anchor.Current); 2811 } 2812 2813 /// ditto 2814 this(InputStream source, long position) 2815 in 2816 { 2817 assert( position >= 0 ); 2818 } 2819 body 2820 { 2821 this(source); 2822 this._position = position; 2823 } 2824 2825 override IConduit conduit() 2826 { 2827 return source.conduit; 2828 } 2829 2830 override void close() 2831 { 2832 source = null; 2833 seeker = null; 2834 } 2835 2836 override size_t read(void[] dst) 2837 { 2838 if( seeker.seek(0, Anchor.Current) != _position ) 2839 seeker.seek(_position, Anchor.Begin); 2840 2841 auto read = source.read(dst); 2842 if( read != IConduit.Eof ) 2843 _position += read; 2844 2845 return read; 2846 } 2847 2848 override void[] load(size_t max=-1) 2849 { 2850 return Conduit.load(this, max); 2851 } 2852 2853 override InputStream flush() 2854 { 2855 source.flush(); 2856 return this; 2857 } 2858 2859 InputStream input() 2860 { 2861 return source; 2862 } 2863 2864 override long seek(long offset, Anchor anchor = cast(Anchor)0) 2865 { 2866 seeker.seek(_position, Anchor.Begin); 2867 return (_position = seeker.seek(offset, anchor)); 2868 } 2869 2870 private: 2871 InputStream source; 2872 InputStream seeker; 2873 long _position; 2874 2875 invariant() 2876 { 2877 assert( cast(Object) source is cast(Object) seeker ); 2878 assert( _position >= 0 ); 2879 } 2880 } 2881 2882 /** 2883 * This stream can be used to provide access to another stream. 2884 * Its distinguishing feature is that the users cannot close the underlying 2885 * stream. 2886 * 2887 * This stream fully supports seeking, and as such requires that the 2888 * underlying stream also support seeking. 2889 */ 2890 class WrapSeekOutputStream : OutputStream 2891 { 2892 //alias IConduit.Seek.Anchor Anchor; 2893 2894 /** 2895 * Create a new wrap stream from the given source. 2896 */ 2897 this(OutputStream source) 2898 in 2899 { 2900 assert( (cast(IConduit.Seek) source.conduit) !is null ); 2901 } 2902 body 2903 { 2904 this.source = source; 2905 this.seeker = source; //cast(IConduit.Seek) source; 2906 this._position = seeker.seek(0, Anchor.Current); 2907 } 2908 2909 /// ditto 2910 this(OutputStream source, long position) 2911 in 2912 { 2913 assert( position >= 0 ); 2914 } 2915 body 2916 { 2917 this(source); 2918 this._position = position; 2919 } 2920 2921 override IConduit conduit() 2922 { 2923 return source.conduit; 2924 } 2925 2926 override void close() 2927 { 2928 source = null; 2929 seeker = null; 2930 } 2931 2932 size_t write(const(void)[] src) 2933 { 2934 if( seeker.seek(0, Anchor.Current) != _position ) 2935 seeker.seek(_position, Anchor.Begin); 2936 2937 auto wrote = source.write(src); 2938 if( wrote != IConduit.Eof ) 2939 _position += wrote; 2940 return wrote; 2941 } 2942 2943 override OutputStream copy(InputStream src, size_t max=-1) 2944 { 2945 Conduit.transfer(src, this, max); 2946 return this; 2947 } 2948 2949 override OutputStream flush() 2950 { 2951 source.flush(); 2952 return this; 2953 } 2954 2955 override OutputStream output() 2956 { 2957 return source; 2958 } 2959 2960 override long seek(long offset, Anchor anchor = cast(Anchor)0) 2961 { 2962 seeker.seek(_position, Anchor.Begin); 2963 return (_position = seeker.seek(offset, anchor)); 2964 } 2965 2966 private: 2967 OutputStream source; 2968 OutputStream seeker; 2969 long _position; 2970 2971 invariant() 2972 { 2973 assert( cast(Object) source is cast(Object) seeker ); 2974 assert( _position >= 0 ); 2975 } 2976 } 2977 2978