zipper.hh 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. // Copyright (C) 2011 Michael McMaster <michael@codesrc.com>
  2. //
  3. // This file is part of libzipper.
  4. //
  5. // libzipper is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // libzipper is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with libzipper. If not, see <http://www.gnu.org/licenses/>.
  17. #ifndef zipper_hh
  18. #define zipper_hh
  19. #include <stdexcept>
  20. #include <memory>
  21. #include <string>
  22. #include <vector>
  23. #include <cstdint>
  24. #include <sys/stat.h> // For mode_t
  25. #include <sys/time.h> // For timeval
  26. /**
  27. \mainpage libzipper C++ (de)compression library
  28. \section intro Introduction
  29. libzipper offers a flexible C++ interface for reading compressed files
  30. in multiple formats.
  31. <a href="http://www.codesrc.com/src/libzipper">Homepage</a>
  32. libzipper aims to provide applications a transparent method of accessing
  33. compressed data. eg. libzipper is suited to reading XML config files that
  34. are compressed to save space.
  35. libzipper is not a general-purpose archive management library, as it
  36. does not provide access to the filesystem attributes of each file.
  37. (ie. libzipper does not support the concepts of file owner, group or
  38. permissions.
  39. \section formats Supported Formats
  40. <ul>
  41. <li>gzip</li>
  42. <li>zip</li>
  43. </ul>
  44. \section example_read Reading a compressed file into memory
  45. \code
  46. #include <zipper.hh>
  47. #include <algorithm>
  48. #include <vector>
  49. class MemWriter : public zipper::Writer
  50. {
  51. public:
  52. std::vector<uint8_t> data;
  53. virtual void writeData(
  54. zsize_t offset, zsize_t bytes, const uint8_t* inData)
  55. {
  56. data.resize(std::max(offset + bytes, data.size()));
  57. std::copy(inData, inData + bytes, &data[offset]);
  58. }
  59. virtual zsize_t getSize() const { return data.size(); }
  60. };
  61. std::vector<uint8_t> readSavedGame(const std::string& filename)
  62. {
  63. // open the compressed input file. FileReader will throw an
  64. // exception if an IO error occurs.
  65. zipper::FileReader reader(filename);
  66. MemWriter writer;
  67. zipper::Decompressor decomp(reader);
  68. std::vector<zipper::CompressedFilePtr> entries(decomp.getEntries());
  69. if (!entries.empty())
  70. {
  71. // Uncompress the first file. Will pass-though data as-is if the
  72. // file is not compressed.
  73. entries.front()->decompress(writer);
  74. }
  75. return writer.data;
  76. }
  77. \endcode
  78. \section example_write Writing compressed files.
  79. \code
  80. #include <zipper.hh>
  81. #include <algorithm>
  82. #include <vector>
  83. class MemReader : public zipper::Reader
  84. {
  85. public:
  86. MemReader(const vector<uint8_t>& data) : m_data(data) {}
  87. virtual const std::string& getSourceName() const
  88. {
  89. static std::string Name("savedGame.dat");
  90. return Name;
  91. }
  92. virtual const timeval& getModTime() const
  93. {
  94. return zipper::s_now;
  95. }
  96. virtual zsize_t getSize() const { return m_data.size(); }
  97. virtual void readData(zsize_t offset, zsize_t bytes, uint8_t* dest) const
  98. {
  99. std::copy(&m_data[offset], &m_data[offset + bytes], dest);
  100. }
  101. private:
  102. std::vector<uint8_t> m_data;
  103. };
  104. void writeSavedGame(
  105. const std::string& filename, const std::vector<uint8_t>& gameData)
  106. {
  107. zipper::FileWriter writer(filename);
  108. zipper::Compressor comp(zipper::Container_zip, writer);
  109. comp.addFile(MemReader(gameData));
  110. }
  111. \endcode
  112. */
  113. /// \namespace zipper
  114. /// \brief The zipper namespace contains the libzipper public API.
  115. namespace zipper
  116. {
  117. /// \typedef zsize_t
  118. /// zsize_t should be used exclusively when dealing with file offsets
  119. /// and sizes to support large files (>4Gb).
  120. ///
  121. /// Unlike size_t on some systems, zsize_t will be 64bit when compiling for
  122. /// a 32bit target.
  123. typedef uint64_t zsize_t;
  124. /// \enum ContainerFormat
  125. /// ContainerFormat enumerates the compressed archive formats supported
  126. /// by libzipper.
  127. ///
  128. /// An application can determine the supported formats by iterating
  129. /// over the Container_begin to Container_end range. eg.
  130. /// \code
  131. /// for (int i = Container_begin; i < Container_end; ++i)
  132. /// {
  133. /// const Container& container(getContainer(ContainerFormat(i)));
  134. /// }
  135. /// \endcode
  136. enum ContainerFormat
  137. {
  138. /// Iteration marker
  139. Container_begin = 0,
  140. /// No container (eg. plain text)
  141. Container_none = 0,
  142. /// ZIP
  143. Container_zip,
  144. /// gzip.
  145. Container_gzip,
  146. /// Iteration marker
  147. Container_end
  148. };
  149. /// \struct Container
  150. /// Provides libzipper capability details for a compressed archive
  151. /// format.
  152. /// \see getContainer
  153. struct Container
  154. {
  155. /// \enum CapabilityBits allows a bitmask to be specified with a
  156. /// combination of boolean flags.
  157. enum CapabilityBits
  158. {
  159. /// Compression bit is set if the format is usable with Compressor
  160. Compression = 1,
  161. /// Decompression bit is set if the format is usable with
  162. /// Decompressor
  163. Decompression = 2,
  164. /// EmbeddedFilenames bit is set if CompressedFile::getPath() is
  165. /// supported
  166. EmbeddedFilenames = 4,
  167. /// Archive bit is set if multiple compressed files may exist in
  168. /// a single container.
  169. Archive = 8,
  170. /// FileSize bit is set if the uncompressed size for each
  171. /// compressed file is recorded in the container.
  172. FileSize = 16
  173. };
  174. /// %Container Type
  175. ContainerFormat format;
  176. /// %Container Internet Media Type (aka MIME type).
  177. /// eg. "application/zip"
  178. std::string mediaType;
  179. /// Bitmask comprised of CapabilityBits enum values.
  180. uint32_t capabilities;
  181. };
  182. /// \brief When passed as a method parameter, it requests that the
  183. /// current time be used instead.
  184. extern const timeval s_now;
  185. /// \brief Returns the capability details of the given format.
  186. const Container& getContainer(ContainerFormat format);
  187. /// \brief Base class for all exceptions thrown by libzipper
  188. class Exception : public std::runtime_error
  189. {
  190. public:
  191. /// Exception ctor
  192. /// \param what A description of the error encountered.
  193. Exception(const std::string& what);
  194. };
  195. /// \brief Exception thrown when the input data does not match
  196. /// the expected Container format.
  197. class FormatException : public Exception
  198. {
  199. public:
  200. /// FormatException ctor
  201. /// \param what A description of the error encountered.
  202. FormatException(const std::string& what);
  203. };
  204. /// \brief Exception thrown when a Reader or Writer instance is unable
  205. /// to satisfy an IO request due to an external error.
  206. class IOException : public Exception
  207. {
  208. public:
  209. /// IOException ctor
  210. /// \param what A description of the error encountered.
  211. IOException(const std::string& what);
  212. };
  213. /// \brief Exception thrown when an operation is requested on a compressed
  214. /// archive that libzipper does not implement.
  215. ///
  216. /// This exception may be thrown even if libzipper advertises general
  217. /// support for the Container format. eg. libzipper supports most
  218. /// ZIP files, but an UnsupportedException will be thrown if given an
  219. /// encrypted ZIP file.
  220. class UnsupportedException : public Exception
  221. {
  222. public:
  223. /// UnsupportedException ctor
  224. /// \param what A description of the error encountered.
  225. UnsupportedException(const std::string& what);
  226. };
  227. /// \brief Reader supplies input data to the compression/decompression
  228. /// functions.
  229. ///
  230. /// Normally, an application using libzipper provides the Reader
  231. /// implementation. The implementation could supply data from files,
  232. /// in-memory buffers, or it could be generated on-the-fly.
  233. ///
  234. /// The Reader implementation must support random access, and must
  235. /// determine at creation time the number of bytes available. The
  236. /// Reader interface is not suitable for use with streaming data.
  237. class Reader
  238. {
  239. public:
  240. /// Reader dtor
  241. virtual ~Reader();
  242. /// Returns a name for this source of the data.
  243. ///
  244. /// For file-based Reader implementations, this would normally be
  245. /// the input filename.
  246. virtual const std::string& getSourceName() const = 0;
  247. /// Return the last-modified timestamp of the data.
  248. /// If the special s_now value is returned, the current time should be
  249. /// used instead.
  250. virtual const timeval& getModTime() const = 0;
  251. /// Returns the number of bytes available via readData()
  252. ///
  253. /// \invariant getSize() is stable throughout the lifetime
  254. /// of the Reader instance.
  255. virtual zsize_t getSize() const = 0;
  256. /// Copies data into the dest buffer
  257. ///
  258. /// An exception must be thrown if it is not possible to copy the
  259. /// requested data into the supplied buffer (eg. file IO error).
  260. ///
  261. /// \pre offset + bytes <= getSize()
  262. ///
  263. /// \param offset Number of bytes to skip at the front of the data
  264. /// source.
  265. /// \param bytes Number of bytes to copy
  266. /// \param dest Destination buffer.
  267. ///
  268. virtual void readData(
  269. zsize_t offset, zsize_t bytes, uint8_t* dest
  270. ) const = 0;
  271. };
  272. /// \brief FileReader is a file-based implementation of the Reader
  273. /// interface.
  274. class FileReader : public Reader
  275. {
  276. public:
  277. /// Read data from the supplied file.
  278. FileReader(const std::string& filename);
  279. /// Read data from the supplied file.
  280. ///
  281. /// \param filename The value used by getSourceName(). This name
  282. /// is arbitary, and does not need to be related to fd.
  283. ///
  284. /// \param fd The descriptor to source data from. The descriptor
  285. /// must be open for reading, blocking, and seekable (ie. lseek(2)).
  286. ///
  287. /// \param closeFd If true, fd will be closed by this object
  288. /// when it is no longer needed.
  289. FileReader(const std::string& filename, int fd, bool closeFd);
  290. /// FileReader dtor
  291. virtual ~FileReader();
  292. /// Inherited from Reader
  293. virtual const std::string& getSourceName() const;
  294. /// Inherited from Reader
  295. virtual const timeval& getModTime() const;
  296. /// Inherited from Reader
  297. virtual zsize_t getSize() const;
  298. /// Inherited from Reader
  299. virtual void readData(
  300. zsize_t offset, zsize_t bytes, uint8_t* dest
  301. ) const;
  302. private:
  303. FileReader(const FileReader&);
  304. FileReader& operator=(const FileReader&);
  305. class FileReaderImpl;
  306. FileReaderImpl* m_impl;
  307. };
  308. /// \typedef ReaderPtr
  309. /// A shared pointer to a Reader
  310. typedef std::shared_ptr<Reader> ReaderPtr;
  311. /// \brief Writer accepts output data from the compression/decompression
  312. /// functions.
  313. ///
  314. /// Normally, an application using libzipper provides the Writer
  315. /// implementation. The implementation could write data to files,
  316. /// in-memory buffers, or it could be simply discarded.
  317. ///
  318. /// The Writer implementation needs only to support sequential access.
  319. class Writer
  320. {
  321. public:
  322. /// Writer dtor
  323. virtual ~Writer();
  324. /// Returns the size of the written data.
  325. virtual zsize_t getSize() const = 0;
  326. /// Accepts output from libzipper
  327. ///
  328. /// An exception must be thrown if it is not possible to accept
  329. /// given data. (eg. file IO error).
  330. ///
  331. /// \param offset Number of bytes to skip at the front of the data
  332. /// source. Skipped bytes will contain null characters if not already
  333. /// assigned a value.
  334. /// \param bytes Number of bytes in data
  335. /// \param data Output from libzipper.
  336. ///
  337. virtual void writeData(
  338. zsize_t offset, zsize_t bytes, const uint8_t* data
  339. ) = 0;
  340. };
  341. /// \typedef WriterPtr
  342. /// A shared pointer to a Writer
  343. typedef std::shared_ptr<Writer> WriterPtr;
  344. /// \brief FileWrter is a file-based implementation of the Writer
  345. /// interface.
  346. class FileWriter : public Writer
  347. {
  348. public:
  349. /// Write data to the supplied file.
  350. /// If the file already exists, it will be truncated.
  351. /// If the file does not exist, it will be created with the
  352. /// given permissions.
  353. ///
  354. /// \param filename The file to open for writing.
  355. ///
  356. /// \param createPermissions The permissions set on the file if it is to
  357. /// be created.
  358. ///
  359. /// \param modTime Set a specific modification time on the created file.
  360. /// If the special s_now value is provided, the current time will be
  361. /// used.
  362. ///
  363. FileWriter(
  364. const std::string& filename,
  365. mode_t createPermissions = 0664,
  366. const timeval& modTime = s_now);
  367. /// Write data to the supplied file.
  368. ///
  369. /// \param filename The filename reported in any exception error
  370. /// messages. This name is arbitary, and does not need to be
  371. /// related to fd.
  372. ///
  373. /// \param fd The descriptor to write data to. The descriptor
  374. /// must be open for writing in blocking mode.
  375. ///
  376. /// \param closeFd If true, fd will be closed by this object
  377. /// when it is no longer needed.
  378. FileWriter(const std::string& filename, int fd, bool closeFd);
  379. /// FileWriter dtor
  380. virtual ~FileWriter();
  381. /// Inherited from Writer
  382. virtual zsize_t getSize() const;
  383. /// Inherited from Writer
  384. virtual void writeData(
  385. zsize_t offset, zsize_t bytes, const uint8_t* data
  386. );
  387. private:
  388. FileWriter(const FileWriter&);
  389. FileWriter& operator=(const FileWriter&);
  390. class FileWriterImpl;
  391. FileWriterImpl* m_impl;
  392. };
  393. /// \brief CompressedFile represents an entry within a compressed archive.
  394. ///
  395. /// CompressedFile instances are created by Decompressor, and allow
  396. /// selectively extracting the contents of an archive.
  397. class CompressedFile
  398. {
  399. public:
  400. /// CompressedFile dtor
  401. virtual ~CompressedFile();
  402. /// Return true if decompress is likely to succeed.
  403. ///
  404. /// isDecompressSupported may return false if libzipper doesn't know
  405. /// how to deal with the compressed data. eg. encrypted files,
  406. /// or ZIP files compressed with non-standard schemes.
  407. virtual bool isDecompressSupported() const = 0;
  408. /// Decompress the file, and store the results via the given
  409. /// writer object.
  410. virtual void decompress(Writer& writer) = 0;
  411. /// Return the file path of the compressed file.
  412. ///
  413. /// Unix-style path separaters ('/') are returned, even if the
  414. /// archive was created under an alternative OS.
  415. virtual const std::string& getPath() const = 0;
  416. /// Return the compressed size of the file
  417. ///
  418. /// getCompressedSize() will return -1 of the FileSize capability
  419. /// bit of the container is false.
  420. virtual zsize_t getCompressedSize() const = 0;
  421. /// Return the uncompressed size of the file
  422. ///
  423. /// The decompress method will pass exactly this number of bytes
  424. /// to the Writer.
  425. ///
  426. /// getUncompressedSize() will return -1 of the FileSize capability
  427. /// bit of the container is false.
  428. virtual zsize_t getUncompressedSize() const = 0;
  429. /// Return the modification time of the original file
  430. virtual const timeval& getModificationTime() const = 0;
  431. };
  432. /// \typedef CompressedFilePtr
  433. /// A shared pointer to a CompressedFile
  434. typedef std::shared_ptr<CompressedFile> CompressedFilePtr;
  435. /// \brief Decompressor detects the compressed archive type of the data,
  436. /// and creates suitable CompressedFile instances to access the compressed
  437. /// data.
  438. class Decompressor
  439. {
  440. public:
  441. /// Create a decompressor from the data made available by reader.
  442. Decompressor(const ReaderPtr& reader);
  443. /// Create a decompressor from the data made available by reader.
  444. ///
  445. /// \param reader must remain in scope for the lifetime of the
  446. /// Decompressor, and lifetime of any CompressedFile objects returned
  447. /// from getEntries()
  448. Decompressor(Reader& reader);
  449. /// Decompressor dtor
  450. ~Decompressor();
  451. /// Return the detected Container type of the compressed archive.
  452. ContainerFormat getContainerFormat() const;
  453. /// Return CompressedFile entries to represent the file entries within
  454. /// a compressed archive.
  455. std::vector<CompressedFilePtr> getEntries() const;
  456. private:
  457. Decompressor(const Decompressor&);
  458. Decompressor& operator=(const Decompressor&);
  459. class DecompressorImpl;
  460. DecompressorImpl* m_decompressor;
  461. };
  462. /// \brief Compressor creates a compressed archive from the supplied
  463. /// Reader objects.
  464. /// data.
  465. class Compressor
  466. {
  467. public:
  468. /// Create a Compressor to output the given compressed archived format
  469. /// to writer.
  470. /// \param writer destination of the compressed data
  471. /// \param format determines the output archive file type to
  472. /// create.
  473. Compressor(ContainerFormat format, const WriterPtr& writer);
  474. /// Create a Compressor to output the given compressed archived format
  475. /// to writer.
  476. ///
  477. /// \param writer is the destination of the compressed data. writer
  478. /// must remain in scope for the lifetime of the Compressor.
  479. /// \param format determines the output archive file type to
  480. /// create.
  481. Compressor(ContainerFormat format, Writer& writer);
  482. /// \brief Compressor dtor
  483. ///
  484. /// Additional data may be passed to writer (given in ctor) to close
  485. /// the compressed archive.
  486. ~Compressor();
  487. /// Compress the data given by reader, and add it to the compressed
  488. /// archive.
  489. void addFile(const Reader& reader);
  490. class CompressorImpl;
  491. private:
  492. Compressor(const Compressor&);
  493. Compressor& operator=(const Compressor&);
  494. CompressorImpl* m_compressor;
  495. };
  496. }
  497. #endif