convauto.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Name: wx/convauto.h
  3. // Purpose: wxConvAuto class declaration
  4. // Author: Vadim Zeitlin
  5. // Created: 2006-04-03
  6. // Copyright: (c) 2006 Vadim Zeitlin
  7. // Licence: wxWindows licence
  8. ///////////////////////////////////////////////////////////////////////////////
  9. #ifndef _WX_CONVAUTO_H_
  10. #define _WX_CONVAUTO_H_
  11. #include "wx/strconv.h"
  12. #include "wx/fontenc.h"
  13. // ----------------------------------------------------------------------------
  14. // wxConvAuto: uses BOM to automatically detect input encoding
  15. // ----------------------------------------------------------------------------
  16. // All currently recognized BOM values.
  17. enum wxBOM
  18. {
  19. wxBOM_Unknown = -1,
  20. wxBOM_None,
  21. wxBOM_UTF32BE,
  22. wxBOM_UTF32LE,
  23. wxBOM_UTF16BE,
  24. wxBOM_UTF16LE,
  25. wxBOM_UTF8
  26. };
  27. class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
  28. {
  29. public:
  30. // default ctor, the real conversion will be created on demand
  31. wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT)
  32. {
  33. Init();
  34. m_encDefault = enc;
  35. }
  36. // copy ctor doesn't initialize anything neither as conversion can only be
  37. // deduced on first use
  38. wxConvAuto(const wxConvAuto& other) : wxMBConv()
  39. {
  40. Init();
  41. m_encDefault = other.m_encDefault;
  42. }
  43. virtual ~wxConvAuto()
  44. {
  45. if ( m_ownsConv )
  46. delete m_conv;
  47. }
  48. // get/set the fall-back encoding used when the input text doesn't have BOM
  49. // and isn't UTF-8
  50. //
  51. // special values are wxFONTENCODING_MAX meaning not to use any fall back
  52. // at all (but just fail to convert in this case) and wxFONTENCODING_SYSTEM
  53. // meaning to use the encoding of the system locale
  54. static wxFontEncoding GetFallbackEncoding() { return ms_defaultMBEncoding; }
  55. static void SetFallbackEncoding(wxFontEncoding enc);
  56. static void DisableFallbackEncoding()
  57. {
  58. SetFallbackEncoding(wxFONTENCODING_MAX);
  59. }
  60. // override the base class virtual function(s) to use our m_conv
  61. virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
  62. const char *src, size_t srcLen = wxNO_LEN) const;
  63. virtual size_t FromWChar(char *dst, size_t dstLen,
  64. const wchar_t *src, size_t srcLen = wxNO_LEN) const;
  65. virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
  66. virtual wxMBConv *Clone() const { return new wxConvAuto(*this); }
  67. // return the BOM type of this buffer
  68. static wxBOM DetectBOM(const char *src, size_t srcLen);
  69. // return the characters composing the given BOM.
  70. static const char* GetBOMChars(wxBOM bomType, size_t* count);
  71. wxBOM GetBOM() const
  72. {
  73. return m_bomType;
  74. }
  75. private:
  76. // common part of all ctors
  77. void Init()
  78. {
  79. // We don't initialize m_encDefault here as different ctors do it
  80. // differently.
  81. m_conv = NULL;
  82. m_bomType = wxBOM_Unknown;
  83. m_ownsConv = false;
  84. m_consumedBOM = false;
  85. }
  86. // initialize m_conv with the UTF-8 conversion
  87. void InitWithUTF8()
  88. {
  89. m_conv = &wxConvUTF8;
  90. m_ownsConv = false;
  91. }
  92. // create the correct conversion object for the given BOM type
  93. void InitFromBOM(wxBOM bomType);
  94. // create the correct conversion object for the BOM present in the
  95. // beginning of the buffer
  96. //
  97. // return false if the buffer is too short to allow us to determine if we
  98. // have BOM or not
  99. bool InitFromInput(const char *src, size_t len);
  100. // adjust src and len to skip over the BOM (identified by m_bomType) at the
  101. // start of the buffer
  102. void SkipBOM(const char **src, size_t *len) const;
  103. // fall-back multibyte encoding to use, may be wxFONTENCODING_SYSTEM or
  104. // wxFONTENCODING_MAX but not wxFONTENCODING_DEFAULT
  105. static wxFontEncoding ms_defaultMBEncoding;
  106. // conversion object which we really use, NULL until the first call to
  107. // either ToWChar() or FromWChar()
  108. wxMBConv *m_conv;
  109. // the multibyte encoding to use by default if input isn't Unicode
  110. wxFontEncoding m_encDefault;
  111. // our BOM type
  112. wxBOM m_bomType;
  113. // true if we allocated m_conv ourselves, false if we just use an existing
  114. // global conversion
  115. bool m_ownsConv;
  116. // true if we already skipped BOM when converting (and not just calculating
  117. // the size)
  118. bool m_consumedBOM;
  119. wxDECLARE_NO_ASSIGN_CLASS(wxConvAuto);
  120. };
  121. #endif // _WX_CONVAUTO_H_