archive.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /////////////////////////////////////////////////////////////////////////////
  2. // Name: archive.h
  3. // Purpose: topic overview
  4. // Author: wxWidgets team
  5. // Licence: wxWindows licence
  6. /////////////////////////////////////////////////////////////////////////////
  7. /**
  8. @page overview_archive Archive Formats
  9. @tableofcontents
  10. The archive classes handle archive formats such as zip, tar, rar and cab.
  11. Currently wxZip, wxTar and wxZlib classes are included.
  12. For each archive type, there are the following classes (using zip here as an
  13. example):
  14. @li wxZipInputStream: Input stream
  15. @li wxZipOutputStream: Output stream
  16. @li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
  17. There are also abstract wxArchive classes that can be used to write code that
  18. can handle any of the archive types, see @ref overview_archive_generic.
  19. Also see wxFileSystem for a higher level interface that can handle archive
  20. files in a generic way.
  21. The classes are designed to handle archives on both seekable streams such as
  22. disk files, or non-seekable streams such as pipes and sockets (see
  23. @ref overview_archive_noseek).
  24. @section overview_archive_create Creating an Archive
  25. Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
  26. archive, then write the entry's data. Another call to PutNextEntry() closes the
  27. current entry and begins the next. For example:
  28. @code
  29. wxFFileOutputStream out(wxT("test.zip"));
  30. wxZipOutputStream zip(out);
  31. wxTextOutputStream txt(zip);
  32. wxString sep(wxFileName::GetPathSeparator());
  33. zip.PutNextEntry(wxT("entry1.txt"));
  34. txt << wxT("Some text for entry1.txt\n");
  35. zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
  36. txt << wxT("Some text for subdir/entry2.txt\n");
  37. @endcode
  38. The name of each entry can be a full path, which makes it possible to store
  39. entries in subdirectories.
  40. @section overview_archive_extract Extracting an Archive
  41. wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
  42. containing the meta-data for the next entry in the archive (and gives away
  43. ownership).
  44. Reading from the input stream then returns the entry's data. Eof() becomes
  45. @true after an attempt has been made to read past the end of the entry's data.
  46. When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
  47. @code
  48. auto_ptr<wxZipEntry> entry;
  49. wxFFileInputStream in(wxT("test.zip"));
  50. wxZipInputStream zip(in);
  51. while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
  52. {
  53. // access meta-data
  54. wxString name = entry->GetName();
  55. // read 'zip' to access the entry's data
  56. }
  57. @endcode
  58. @section overview_archive_modify Modifying an Archive
  59. To modify an existing archive, write a new copy of the archive to a new file,
  60. making any necessary changes along the way and transferring any unchanged
  61. entries using wxArchiveOutputStream::CopyEntry().
  62. For archive types which compress entry data, CopyEntry() is likely to be much
  63. more efficient than transferring the data using Read() and Write() since it
  64. will copy them without decompressing and recompressing them.
  65. In general modifications are not possible without rewriting the archive, though
  66. it may be possible in some limited cases. Even then, rewriting the archive is
  67. usually a better choice since a failure can be handled without losing the whole
  68. archive. wxTempFileOutputStream can be helpful to do this.
  69. For example to delete all entries matching the pattern "*.txt":
  70. @code
  71. auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
  72. wxTempFileOutputStream out(wxT("test.zip"));
  73. wxZipInputStream inzip(*in);
  74. wxZipOutputStream outzip(out);
  75. auto_ptr<wxZipEntry> entry;
  76. // transfer any meta-data for the archive as a whole (the zip comment
  77. // in the case of zip)
  78. outzip.CopyArchiveMetaData(inzip);
  79. // call CopyEntry for each entry except those matching the pattern
  80. while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
  81. if (!entry->GetName().Matches(wxT("*.txt")))
  82. if (!outzip.CopyEntry(entry.release(), inzip))
  83. break;
  84. // close the input stream by releasing the pointer to it, do this
  85. // before closing the output stream so that the file can be replaced
  86. in.reset();
  87. // you can check for success as follows
  88. bool success = inzip.Eof() && outzip.Close() && out.Commit();
  89. @endcode
  90. @section overview_archive_byname Looking Up an Archive Entry by Name
  91. Also see wxFileSystem for a higher level interface that is more convenient for
  92. accessing archive entries by name.
  93. To open just one entry in an archive, the most efficient way is to simply
  94. search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
  95. the required entry is found. This works both for archives on seekable and
  96. non-seekable streams.
  97. The format of filenames in the archive is likely to be different from the local
  98. filename format. For example zips and tars use unix style names, with forward
  99. slashes as the path separator, and absolute paths are not allowed. So if on
  100. Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
  101. back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
  102. into the internal format and back has lost some information.
  103. So to avoid ambiguity when searching for an entry matching a local name, it is
  104. better to convert the local name to the archive's internal format and search
  105. for that:
  106. @code
  107. auto_ptr<wxZipEntry> entry;
  108. // convert the local name we are looking for into the internal format
  109. wxString name = wxZipEntry::GetInternalName(localname);
  110. // open the zip
  111. wxFFileInputStream in(wxT("test.zip"));
  112. wxZipInputStream zip(in);
  113. // call GetNextEntry() until the required internal name is found
  114. do
  115. {
  116. entry.reset(zip.GetNextEntry());
  117. }
  118. while (entry.get() != NULL && entry->GetInternalName() != name);
  119. if (entry.get() != NULL)
  120. {
  121. // read the entry's data...
  122. }
  123. @endcode
  124. To access several entries randomly, it is most efficient to transfer the entire
  125. catalogue of entries to a container such as a std::map or a wxHashMap then
  126. entries looked up by name can be opened using the
  127. wxArchiveInputStream::OpenEntry() method.
  128. @code
  129. WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
  130. ZipCatalog::iterator it;
  131. wxZipEntry *entry;
  132. ZipCatalog cat;
  133. // open the zip
  134. wxFFileInputStream in(wxT("test.zip"));
  135. wxZipInputStream zip(in);
  136. // load the zip catalog
  137. while ((entry = zip.GetNextEntry()) != NULL)
  138. {
  139. wxZipEntry*& current = cat[entry->GetInternalName()];
  140. // some archive formats can have multiple entries with the same name
  141. // (e.g. tar) though it is an error in the case of zip
  142. delete current;
  143. current = entry;
  144. }
  145. // open an entry by name
  146. if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
  147. {
  148. zip.OpenEntry(*it->second);
  149. // ... now read entry's data
  150. }
  151. @endcode
  152. To open more than one entry simultaneously you need more than one underlying
  153. stream on the same archive:
  154. @code
  155. // opening another entry without closing the first requires another
  156. // input stream for the same file
  157. wxFFileInputStream in2(wxT("test.zip"));
  158. wxZipInputStream zip2(in2);
  159. if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
  160. zip2.OpenEntry(*it->second);
  161. @endcode
  162. @section overview_archive_generic Generic Archive Programming
  163. Also see wxFileSystem for a higher level interface that can handle archive
  164. files in a generic way.
  165. The specific archive classes, such as the wxZip classes, inherit from the
  166. following abstract classes which can be used to write code that can handle any
  167. of the archive types:
  168. @li wxArchiveInputStream: Input stream
  169. @li wxArchiveOutputStream: Output stream
  170. @li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
  171. In order to able to write generic code it's necessary to be able to create
  172. instances of the classes without knowing which archive type is being used.
  173. To allow this there is a class factory for each archive type, derived from
  174. wxArchiveClassFactory, that can create the other classes.
  175. For example, given wxArchiveClassFactory* factory, streams and entries can be
  176. created like this:
  177. @code
  178. // create streams without knowing their type
  179. auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
  180. auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
  181. // create an empty entry object
  182. auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
  183. @endcode
  184. For the factory itself, the static member wxArchiveClassFactory::Find() can be
  185. used to find a class factory that can handle a given file extension or mime
  186. type. For example, given @e filename:
  187. @code
  188. const wxArchiveClassFactory *factory;
  189. factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
  190. if (factory)
  191. stream = factory->NewStream(new wxFFileInputStream(filename));
  192. @endcode
  193. @e Find() does not give away ownership of the returned pointer, so it does not
  194. need to be deleted.
  195. There are similar class factories for the filter streams that handle the
  196. compression and decompression of a single stream, such as wxGzipInputStream.
  197. These can be found using wxFilterClassFactory::Find().
  198. For example, to list the contents of archive @e filename:
  199. @code
  200. auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
  201. if (in->IsOk())
  202. {
  203. // look for a filter handler, e.g. for '.gz'
  204. const wxFilterClassFactory *fcf;
  205. fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
  206. if (fcf)
  207. {
  208. in.reset(fcf->NewStream(in.release()));
  209. // pop the extension, so if it was '.tar.gz' it is now just '.tar'
  210. filename = fcf->PopExtension(filename);
  211. }
  212. // look for a archive handler, e.g. for '.zip' or '.tar'
  213. const wxArchiveClassFactory *acf;
  214. acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
  215. if (acf)
  216. {
  217. auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
  218. auto_ptr<wxArchiveEntry> entry;
  219. // list the contents of the archive
  220. while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
  221. std::wcout << entry->GetName().c_str() << "\n";
  222. }
  223. else
  224. {
  225. wxLogError(wxT("can't handle '%s'"), filename.c_str());
  226. }
  227. }
  228. @endcode
  229. @section overview_archive_noseek Archives on Non-Seekable Streams
  230. In general, handling archives on non-seekable streams is done in the same way
  231. as for seekable streams, with a few caveats.
  232. The main limitation is that accessing entries randomly using
  233. wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
  234. accessed sequentially in the order they are stored within the archive.
  235. For each archive type, there will also be other limitations which will depend
  236. on the order the entries' meta-data is stored within the archive. These are not
  237. too difficult to deal with, and are outlined below.
  238. @subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
  239. When writing archives, some archive formats store the entry size before the
  240. entry's data (tar has this limitation, zip doesn't). In this case the entry's
  241. size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
  242. occurs.
  243. This is only an issue on non-seekable streams, since otherwise the archive
  244. output stream can seek back and fix up the header once the size of the entry is
  245. known.
  246. For generic programming, one way to handle this is to supply the size whenever
  247. it is known, and rely on the error message from the output stream when the
  248. operation is not supported.
  249. @subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
  250. Some archive formats do not store all an entry's meta-data before the entry's
  251. data (zip is an example). In this case, when reading from a non-seekable
  252. stream, wxArchiveInputStream::GetNextEntry() can only return a partially
  253. populated wxArchiveEntry object - not all the fields are set.
  254. The input stream then keeps a weak reference to the entry object and updates it
  255. when more meta-data becomes available. A weak reference being one that does not
  256. prevent you from deleting the wxArchiveEntry object - the input stream only
  257. attempts to update it if it is still around.
  258. The documentation for each archive entry type gives the details of what
  259. meta-data becomes available and when. For generic programming, when the worst
  260. case must be assumed, you can rely on all the fields of wxArchiveEntry being
  261. fully populated when GetNextEntry() returns, with the following exceptions:
  262. @li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
  263. been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
  264. has been called.
  265. @li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
  266. the archive has been reached, i.e. after GetNextEntry() returns @NULL and
  267. Eof() is @true.
  268. This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
  269. preserve entries' meta-data. No matter what order order the meta-data occurs
  270. within the archive, the input stream will always have read it before the output
  271. stream must write it.
  272. @subsection overview_archive_noseek_notifier wxArchiveNotifier
  273. Notifier objects can be used to get a notification whenever an input stream
  274. updates a wxArchiveEntry object's data via the weak reference mechanism.
  275. Consider the following code which renames an entry in an archive. This is the
  276. usual way to modify an entry's meta-data, simply set the required field before
  277. writing it with wxArchiveOutputStream::CopyEntry():
  278. @code
  279. auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
  280. auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
  281. auto_ptr<wxArchiveEntry> entry;
  282. outarc->CopyArchiveMetaData(*arc);
  283. while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
  284. {
  285. if (entry->GetName() == from)
  286. entry->SetName(to);
  287. if (!outarc->CopyEntry(entry.release(), *arc))
  288. break;
  289. }
  290. bool success = arc->Eof() && outarc->Close();
  291. @endcode
  292. However, for non-seekable streams, this technique cannot be used for fields
  293. such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
  294. wxArchiveInputStream::GetNextEntry() returns.
  295. In this case a wxArchiveNotifier can be used:
  296. @code
  297. class MyNotifier : public wxArchiveNotifier
  298. {
  299. public:
  300. void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
  301. };
  302. @endcode
  303. The meta-data changes are done in your notifier's
  304. wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
  305. is called before CopyEntry():
  306. @code
  307. auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
  308. auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
  309. auto_ptr<wxArchiveEntry> entry;
  310. MyNotifier notifier;
  311. outarc->CopyArchiveMetaData(*arc);
  312. while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
  313. {
  314. entry->SetNotifier(notifier);
  315. if (!outarc->CopyEntry(entry.release(), *arc))
  316. break;
  317. }
  318. bool success = arc->Eof() && outarc->Close();
  319. @endcode
  320. SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
  321. it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
  322. be called at least once, this technique always works even when it is not
  323. strictly necessary to use it. For example, changing the entry name can be done
  324. this way too and it works on seekable streams as well as non-seekable.
  325. */