zip.cc 13 KB


  1. // Copyright (C) 2011 Michael McMaster <michael@codesrc.com>
  2. //
  3. // This file is part of libzipper.
  4. //
  5. // libzipper is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // libzipper is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with libzipper. If not, see <http://www.gnu.org/licenses/>.
  17. #include "zipper.hh"
  18. #include "zip.hh"
  19. #include "util.hh"
  20. #include "deflate.hh"
  21. #include <algorithm>
  22. #include <cassert>
  23. #include <iostream>
  24. #include <time.h>
  25. #include <string.h>
  26. #include "config.h"
  27. using namespace zipper;
  28. namespace
  29. {
  30. time_t convertDosDateTime(uint16_t date, uint16_t time)
  31. {
  32. struct tm parts;
  33. memset(&parts, 0, sizeof(parts));
  34. parts.tm_sec = time & 0x1F;
  35. parts.tm_min = (time & 0x7E0) >> 5;
  36. parts.tm_hour = (time >> 11);
  37. parts.tm_mday = date & 0x1F;
  38. parts.tm_mon = ((date & 0x1E0) >> 5) - 1;
  39. parts.tm_year = (date >> 9) + 80;
  40. return mktime(&parts);
  41. }
  42. void convertDosDateTime(time_t in, uint16_t& date, uint16_t& time)
  43. {
  44. struct tm buf;
  45. struct tm* parts(localtime_r(&in, &buf));
  46. time =
  47. parts->tm_sec +
  48. (parts->tm_min << 5) +
  49. (parts->tm_hour << 11);
  50. date =
  51. parts->tm_mday +
  52. ((parts->tm_mon + 1) << 5) +
  53. ((parts->tm_year - 80) << 9);
  54. }
  55. class FileEntry : public CompressedFile
  56. {
  57. public:
  58. FileEntry(
  59. const ReaderPtr& reader,
  60. uint16_t versionNeeded,
  61. uint16_t gpFlag,
  62. uint16_t compressionMethod,
  63. uint32_t crc,
  64. zsize_t compressedSize,
  65. zsize_t uncompressedSize,
  66. time_t modTime,
  67. zsize_t localHeaderOffset,
  68. std::string fileName
  69. ) :
  70. m_reader(reader),
  71. m_versionNeeded(versionNeeded),
  72. m_gpFlag(gpFlag),
  73. m_compressionMethod(compressionMethod),
  74. m_crc(crc),
  75. m_compressedSize(compressedSize),
  76. m_uncompressedSize(uncompressedSize),
  77. m_localHeaderOffset(localHeaderOffset),
  78. m_fileName(fileName)
  79. {
  80. m_modTime.tv_sec = modTime;
  81. m_modTime.tv_usec = 0;
  82. }
  83. virtual bool isDecompressSupported() const
  84. {
  85. return ((m_versionNeeded & 0xf) <= 20) &&
  86. ((m_gpFlag & 0x1) == 0) && // Not encrypted
  87. ((m_compressionMethod == 0) || (m_compressionMethod == 8));
  88. }
  89. virtual const std::string& getPath() const
  90. {
  91. return m_fileName;
  92. }
  93. virtual zsize_t getCompressedSize() const { return m_compressedSize; }
  94. virtual zsize_t getUncompressedSize() const
  95. {
  96. return m_uncompressedSize;
  97. }
  98. virtual const timeval& getModificationTime() const { return m_modTime; }
  99. virtual void decompress(Writer& writer)
  100. {
  101. enum
  102. {
  103. Signature = 0x04034b50,
  104. MinRecordBytes = 30,
  105. ChunkSize = 64*1024
  106. };
  107. std::vector<uint8_t> localRecord(MinRecordBytes);
  108. m_reader->readData(
  109. m_localHeaderOffset, MinRecordBytes, &localRecord[0]
  110. );
  111. if (read32_le(localRecord, 0) != Signature)
  112. {
  113. throw FormatException("Invalid local ZIP record");
  114. }
  115. // Don't trust the lengths for filename and extra content read from
  116. // the central records. At least for extra, these DO differ for
  117. // unknown reasons
  118. zsize_t filenameLength(read16_le(localRecord, 26));
  119. zsize_t extraLength(read16_le(localRecord, 28));
  120. zsize_t startCompressedBytes(
  121. m_localHeaderOffset +
  122. MinRecordBytes +
  123. filenameLength +
  124. extraLength
  125. );
  126. zsize_t endCompressedBytes(
  127. startCompressedBytes + m_compressedSize
  128. );
  129. if (endCompressedBytes > m_reader->getSize())
  130. {
  131. throw FormatException("Compressed file size is too long");
  132. }
  133. switch (m_compressionMethod)
  134. {
  135. case 0: // No compression
  136. {
  137. for (zsize_t pos(startCompressedBytes);
  138. pos < endCompressedBytes;
  139. pos += ChunkSize
  140. )
  141. {
  142. uint8_t buf[ChunkSize];
  143. zsize_t bytes(
  144. std::min(zsize_t(ChunkSize), endCompressedBytes - pos)
  145. );
  146. m_reader->readData(pos, bytes, &buf[0]);
  147. writer.writeData(pos, bytes, &buf[0]);
  148. }
  149. }; break;
  150. case 8: // Deflate
  151. {
  152. uint32_t crc(0);
  153. zsize_t inPos(startCompressedBytes);
  154. zsize_t outPos(0);
  155. inflate(
  156. m_reader,
  157. writer,
  158. inPos,
  159. endCompressedBytes,
  160. outPos,
  161. crc);
  162. if (m_gpFlag & 0x4) // CRC is after compressed data
  163. {
  164. uint8_t dataDescriptor[12];
  165. m_reader->readData(
  166. inPos, sizeof(dataDescriptor), &dataDescriptor[0]);
  167. m_crc = read32_le(dataDescriptor, 0);
  168. m_compressedSize = read32_le(dataDescriptor, 4);
  169. m_uncompressedSize = read32_le(dataDescriptor, 8);
  170. }
  171. if (crc != m_crc)
  172. {
  173. throw FormatException("Corrupt Data (CRC failure)");
  174. }
  175. }; break;
  176. default:
  177. throw UnsupportedException("Unsupported compression scheme");
  178. };
  179. }
  180. private:
  181. ReaderPtr m_reader;
  182. uint16_t m_versionNeeded;
  183. uint16_t m_gpFlag;
  184. uint16_t m_compressionMethod;
  185. uint32_t m_crc;
  186. zsize_t m_compressedSize;
  187. zsize_t m_uncompressedSize;
  188. timeval m_modTime;
  189. zsize_t m_localHeaderOffset;
  190. std::string m_fileName;
  191. };
  192. bool readEndCentralDirectory(
  193. const ReaderPtr& reader,
  194. zsize_t& centralDirectoryBytes,
  195. zsize_t& centralDirectoryOffset,
  196. zsize_t& centralDirectoryEntries
  197. )
  198. {
  199. // Read the end of central directory record. This
  200. // record enables us to find the remainding
  201. // records without searching for record signatures.
  202. // TODO does not consider the Zip64 entries.
  203. enum
  204. {
  205. MinRecordBytes = 22, // Minimum size with no comment
  206. MaxCommentBytes = 65535, // 2 bytes to store comment length
  207. Signature = 0x06054b50
  208. };
  209. zsize_t providerSize(reader->getSize());
  210. if (providerSize < MinRecordBytes)
  211. {
  212. throw FormatException("Too small");
  213. }
  214. size_t bufSize(
  215. std::min(zsize_t(MinRecordBytes + MaxCommentBytes), providerSize)
  216. );
  217. std::vector<uint8_t> buffer(bufSize);
  218. reader->readData(providerSize - bufSize, bufSize, &buffer[0]);
  219. // Need to search for this record, as it ends in a variable-length
  220. // comment field. Search backwards, with the assumption that the
  221. // comment doesn't exist, or is much smaller than the maximum
  222. // length
  223. bool recordFound(false);
  224. ssize_t pos(bufSize - MinRecordBytes);
  225. for (; pos >= 0; --pos)
  226. {
  227. recordFound = (read32_le(buffer, pos) == Signature);
  228. break;
  229. }
  230. if (recordFound)
  231. {
  232. if (read16_le(buffer, pos + 4) != 0)
  233. {
  234. throw UnsupportedException("Spanned disks not supported");
  235. }
  236. centralDirectoryBytes = read32_le(buffer, pos + 12);
  237. centralDirectoryOffset = read32_le(buffer, pos + 16);
  238. centralDirectoryEntries = read16_le(buffer, pos + 10);
  239. }
  240. return recordFound;
  241. }
  242. std::vector<CompressedFilePtr>
  243. readCentralDirectory(const ReaderPtr& reader)
  244. {
  245. enum Constants
  246. {
  247. MinRecordBytes = 46,
  248. Signature = 0x02014b50
  249. };
  250. zsize_t centralDirectoryBytes(0);
  251. zsize_t centralDirectoryOffset(0);
  252. zsize_t centralDirectoryEntries(0);
  253. bool isZip(
  254. readEndCentralDirectory(
  255. reader,
  256. centralDirectoryBytes,
  257. centralDirectoryOffset,
  258. centralDirectoryEntries
  259. )
  260. );
  261. (void) isZip; // Avoid unused warning.
  262. assert(isZip);
  263. std::vector<uint8_t> buffer(centralDirectoryBytes);
  264. reader->readData(
  265. centralDirectoryOffset,
  266. centralDirectoryBytes,
  267. &buffer[0]
  268. );
  269. zsize_t pos(0);
  270. std::vector<CompressedFilePtr> entries;
  271. while ((pos + MinRecordBytes) < buffer.size())
  272. {
  273. if (read32_le(buffer, pos) != Signature)
  274. {
  275. // Unknown record type.
  276. pos += 1;
  277. continue;
  278. }
  279. uint16_t versionNeeded(read16_le(buffer, pos + 6));
  280. uint16_t gpFlag(read16_le(buffer, pos + 8));
  281. uint16_t compressionMethod(read16_le(buffer, pos + 10));
  282. uint16_t modTime(read16_le(buffer, pos + 12));
  283. uint16_t modDate(read16_le(buffer, pos + 14));
  284. uint32_t crc(read32_le(buffer, pos + 16));
  285. uint32_t compressedSize(read32_le(buffer, pos + 20));
  286. uint32_t uncompressedSize(read32_le(buffer, pos + 24));
  287. size_t fileNameLen(read16_le(buffer, pos + 28));
  288. size_t extraLen(read16_le(buffer, pos + 30));
  289. size_t commentLen(read16_le(buffer, pos + 32));
  290. uint32_t localHeaderOffset(read32_le(buffer, pos + 42));
  291. if ((fileNameLen + extraLen + commentLen + MinRecordBytes + pos) >
  292. buffer.size()
  293. )
  294. {
  295. throw FormatException("File comments are too long");
  296. }
  297. std::string fileName(
  298. &buffer[pos + MinRecordBytes],
  299. &buffer[pos + MinRecordBytes + fileNameLen]
  300. );
  301. entries.push_back(
  302. CompressedFilePtr(
  303. new FileEntry(
  304. reader,
  305. versionNeeded,
  306. gpFlag,
  307. compressionMethod,
  308. crc,
  309. compressedSize,
  310. uncompressedSize,
  311. convertDosDateTime(modDate, modTime),
  312. localHeaderOffset,
  313. fileName
  314. )
  315. )
  316. );
  317. pos += MinRecordBytes + fileNameLen + extraLen + commentLen;
  318. }
  319. return entries;
  320. }
  321. }
  322. void
  323. zipper::zip(
  324. const std::string& filename,
  325. const Reader& reader,
  326. const WriterPtr& writer,
  327. ZipFileRecord& outRecord)
  328. {
  329. enum Constants
  330. {
  331. ChunkSize = 64*1024,
  332. WindowBits = 15,
  333. TimePos = 10
  334. };
  335. static uint8_t Header[] =
  336. {
  337. 0x50, 0x4b, 0x03, 0x04, // Header
  338. 20, // Version (2.0)
  339. 0, // File attributes
  340. 0,0, // gp flag.
  341. 8,0, // deflate method
  342. 0,0, // file time
  343. 0,0, // file date
  344. 0,0,0,0, // CRC32
  345. 0,0,0,0, // Compressed size
  346. 0,0,0,0 // Uncompressed size
  347. };
  348. zsize_t outPos(writer->getSize());
  349. outRecord.localHeaderOffset = outPos;
  350. outRecord.filename = filename;
  351. // Write header
  352. {
  353. uint8_t buffer[ChunkSize];
  354. memcpy(buffer, Header, sizeof(Header));
  355. zsize_t pos(sizeof(Header));
  356. std::string::size_type filenameSize(filename.size());
  357. if (filenameSize > (ChunkSize - pos))
  358. {
  359. filenameSize = ChunkSize - pos;
  360. }
  361. buffer[pos++] = filenameSize & 0xff;
  362. buffer[pos++] = (filenameSize >> 8);
  363. buffer[pos++] = 0; // extra field len
  364. buffer[pos++] = 0; // extra field len
  365. memcpy(buffer + pos, filename.data(), filenameSize);
  366. pos += filenameSize;
  367. writer->writeData(outPos, pos, &buffer[0]);
  368. outPos += pos;
  369. }
  370. // Write compressed data
  371. deflate(
  372. reader,
  373. writer,
  374. outPos,
  375. outRecord.uncompressedSize,
  376. outRecord.compressedSize,
  377. outRecord.crc32);
  378. // Go back and complete the header.
  379. convertDosDateTime(
  380. reader.getModTime().tv_sec, outRecord.dosDate, outRecord.dosTime);
  381. uint8_t trailer[16];
  382. write16_le(outRecord.dosTime, &trailer[0]);
  383. write16_le(outRecord.dosDate, &trailer[2]);
  384. write32_le(outRecord.crc32, &trailer[4]);
  385. write32_le(outRecord.compressedSize, &trailer[8]);
  386. write32_le(outRecord.uncompressedSize, &trailer[12]);
  387. writer->writeData(
  388. outRecord.localHeaderOffset + TimePos, sizeof(trailer), &trailer[0]);
  389. }
  390. void
  391. zipper::zipFinalise(
  392. const std::vector<ZipFileRecord>& records,
  393. const WriterPtr& writer)
  394. {
  395. enum Constants
  396. {
  397. ChunkSize = 64*1024
  398. };
  399. static uint8_t FileHeader[] =
  400. {
  401. 0x50, 0x4b, 0x01, 0x02, // Header
  402. 20, 0x00, // Version (2.0)
  403. 20, 0x00, // Version Needed to extract (2.0)
  404. 0,0, // gp flag.
  405. 8,0 // deflate method
  406. };
  407. zsize_t outPos(writer->getSize());
  408. uint32_t centralDirOffset(outPos);
  409. for (size_t i = 0; i < records.size(); ++i)
  410. {
  411. uint8_t buffer[ChunkSize];
  412. memcpy(buffer, FileHeader, sizeof(FileHeader));
  413. zsize_t pos(sizeof(FileHeader));
  414. write16_le(records[i].dosTime, &buffer[pos]);
  415. pos += 2;
  416. write16_le(records[i].dosDate, &buffer[pos]);
  417. pos += 2;
  418. write32_le(records[i].crc32, &buffer[pos]);
  419. pos += 4;
  420. write32_le(records[i].compressedSize, &buffer[pos]);
  421. pos += 4;
  422. write32_le(records[i].uncompressedSize, &buffer[pos]);
  423. pos += 4;
  424. std::string::size_type filenameSize(records[i].filename.size());
  425. if (filenameSize > (ChunkSize - pos))
  426. {
  427. filenameSize = ChunkSize - pos;
  428. }
  429. write16_le(filenameSize, &buffer[pos]);
  430. pos += 2;
  431. write16_le(0, &buffer[pos]); // extra field len
  432. pos += 2;
  433. write16_le(0, &buffer[pos]); // file comment len
  434. pos += 2;
  435. write16_le(0, &buffer[pos]); // disk number
  436. pos += 2;
  437. write16_le(0, &buffer[pos]); // internal file attributes
  438. pos += 2;
  439. write32_le(0, &buffer[pos]); // external file attributes
  440. pos += 4;
  441. write32_le(records[i].localHeaderOffset, &buffer[pos]);
  442. pos += 4;
  443. memcpy(buffer + pos, records[i].filename.data(), filenameSize);
  444. pos += filenameSize;
  445. writer->writeData(outPos, pos, &buffer[0]);
  446. outPos += pos;
  447. }
  448. uint32_t centralDirSize(writer->getSize() - centralDirOffset);
  449. {
  450. // End-of-directory record.
  451. static uint8_t EndDirectory[] =
  452. {
  453. 0x50, 0x4b, 0x05, 0x06, // Header
  454. 0x00, 0x00, // Disk num
  455. 0x00, 0x00 // Disk with central dir
  456. };
  457. uint8_t buffer[ChunkSize];
  458. memcpy(buffer, EndDirectory, sizeof(EndDirectory));
  459. zsize_t pos(sizeof(EndDirectory));
  460. write16_le(records.size(), &buffer[pos]); // Entries on this disk
  461. pos += 2;
  462. write16_le(records.size(), &buffer[pos]); // Total entries
  463. pos += 2;
  464. write32_le(centralDirSize, &buffer[pos]);
  465. pos += 4;
  466. write32_le(centralDirOffset, &buffer[pos]);
  467. pos += 4;
  468. write16_le(0, &buffer[pos]); // Zip comment length
  469. pos += 2;
  470. writer->writeData(outPos, pos, &buffer[0]);
  471. outPos += pos;
  472. }
  473. }
  474. std::vector<CompressedFilePtr>
  475. zipper::unzip(const ReaderPtr& reader)
  476. {
  477. return readCentralDirectory(reader);
  478. }
  479. bool
  480. zipper::isZip(const ReaderPtr& reader)
  481. {
  482. zsize_t centralDirectoryBytes(0);
  483. zsize_t centralDirectoryOffset(0);
  484. zsize_t centralDirectoryEntries(0);
  485. bool result(
  486. readEndCentralDirectory(
  487. reader,
  488. centralDirectoryBytes,
  489. centralDirectoryOffset,
  490. centralDirectoryEntries
  491. )
  492. );
  493. return result;
  494. }