tokenzr.h 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /////////////////////////////////////////////////////////////////////////////
  2. // Name: wx/tokenzr.h
  3. // Purpose: String tokenizer - a C++ replacement for strtok(3)
  4. // Author: Guilhem Lavaux
  5. // Modified by: (or rather rewritten by) Vadim Zeitlin
  6. // Created: 04/22/98
  7. // Copyright: (c) Guilhem Lavaux
  8. // Licence: wxWindows licence
  9. /////////////////////////////////////////////////////////////////////////////
  10. #ifndef _WX_TOKENZRH
  11. #define _WX_TOKENZRH
  12. #include "wx/object.h"
  13. #include "wx/string.h"
  14. #include "wx/arrstr.h"
  15. // ----------------------------------------------------------------------------
  16. // constants
  17. // ----------------------------------------------------------------------------
  18. // default: delimiters are usual white space characters
  19. #define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
  20. // wxStringTokenizer mode flags which determine its behaviour
  21. enum wxStringTokenizerMode
  22. {
  23. wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called
  24. wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else
  25. wxTOKEN_RET_EMPTY, // return empty token in the middle of the string
  26. wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too
  27. wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY)
  28. wxTOKEN_STRTOK // behave exactly like strtok(3)
  29. };
  30. // ----------------------------------------------------------------------------
  31. // wxStringTokenizer: replaces infamous strtok() and has some other features
  32. // ----------------------------------------------------------------------------
  33. class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
  34. {
  35. public:
  36. // ctors and initializers
  37. // default ctor, call SetString() later
  38. wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
  39. // ctor which gives us the string
  40. wxStringTokenizer(const wxString& str,
  41. const wxString& delims = wxDEFAULT_DELIMITERS,
  42. wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  43. // args are same as for the non default ctor above
  44. void SetString(const wxString& str,
  45. const wxString& delims = wxDEFAULT_DELIMITERS,
  46. wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  47. // reinitialize the tokenizer with the same delimiters/mode
  48. void Reinit(const wxString& str);
  49. // tokens access
  50. // return the number of remaining tokens
  51. size_t CountTokens() const;
  52. // did we reach the end of the string?
  53. bool HasMoreTokens() const;
  54. // get the next token, will return empty string if !HasMoreTokens()
  55. wxString GetNextToken();
  56. // get the delimiter which terminated the token last retrieved by
  57. // GetNextToken() or NUL if there had been no tokens yet or the last
  58. // one wasn't terminated (but ran to the end of the string)
  59. wxChar GetLastDelimiter() const { return m_lastDelim; }
  60. // get current tokenizer state
  61. // returns the part of the string which remains to tokenize (*not* the
  62. // initial string)
  63. wxString GetString() const { return wxString(m_pos, m_string.end()); }
  64. // returns the current position (i.e. one index after the last
  65. // returned token or 0 if GetNextToken() has never been called) in the
  66. // original string
  67. size_t GetPosition() const { return m_pos - m_string.begin(); }
  68. // misc
  69. // get the current mode - can be different from the one passed to the
  70. // ctor if it was wxTOKEN_DEFAULT
  71. wxStringTokenizerMode GetMode() const { return m_mode; }
  72. // do we return empty tokens?
  73. bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
  74. // backwards compatibility section from now on
  75. // -------------------------------------------
  76. // for compatibility only, use GetNextToken() instead
  77. wxString NextToken() { return GetNextToken(); }
  78. // compatibility only, don't use
  79. void SetString(const wxString& to_tokenize,
  80. const wxString& delims,
  81. bool WXUNUSED(ret_delim))
  82. {
  83. SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
  84. }
  85. wxStringTokenizer(const wxString& to_tokenize,
  86. const wxString& delims,
  87. bool ret_delim)
  88. {
  89. SetString(to_tokenize, delims, ret_delim);
  90. }
  91. protected:
  92. bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
  93. bool DoHasMoreTokens() const;
  94. enum MoreTokensState
  95. {
  96. MoreTokens_Unknown,
  97. MoreTokens_Yes,
  98. MoreTokens_No
  99. };
  100. MoreTokensState m_hasMoreTokens;
  101. wxString m_string; // the string we tokenize
  102. wxString::const_iterator m_stringEnd;
  103. // FIXME-UTF8: use wxWcharBuffer
  104. wxWxCharBuffer m_delims; // all possible delimiters
  105. size_t m_delimsLen;
  106. wxString::const_iterator m_pos; // the current position in m_string
  107. wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
  108. wxChar m_lastDelim; // delimiter after last token or '\0'
  109. };
  110. // ----------------------------------------------------------------------------
  111. // convenience function which returns all tokens at once
  112. // ----------------------------------------------------------------------------
  113. // the function takes the same parameters as wxStringTokenizer ctor and returns
  114. // the array containing all tokens
  115. wxArrayString WXDLLIMPEXP_BASE
  116. wxStringTokenize(const wxString& str,
  117. const wxString& delims = wxDEFAULT_DELIMITERS,
  118. wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  119. #endif // _WX_TOKENZRH