| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 | 
							- /////////////////////////////////////////////////////////////////////////////
 
- // Name:        convauto.h
 
- // Purpose:     interface of wxConvAuto
 
- // Author:      wxWidgets team
 
- // Licence:     wxWindows licence
 
- /////////////////////////////////////////////////////////////////////////////
 
- /**
 
-     Constants representing various BOM types.
 
-     BOM is an abbreviation for "Byte Order Mark", a special Unicode character
 
-     which may be inserted into the beginning of a text stream to indicate its
 
-     encoding.
 
-     @since 2.9.3
 
-  */
 
- enum wxBOM
 
- {
 
-     /**
 
-         Unknown BOM.
 
-         This is returned if BOM presence couldn't be determined and normally
 
-         happens because not enough bytes of input have been analysed.
 
-      */
 
-     wxBOM_Unknown = -1,
 
-     /**
 
-         No BOM.
 
-         The stream doesn't contain BOM character at all.
 
-      */
 
-     wxBOM_None,
 
-     /**
 
-         UTF-32 big endian BOM.
 
-         The stream is encoded in big endian variant of UTF-32.
 
-      */
 
-     wxBOM_UTF32BE,
 
-     /**
 
-         UTF-32 little endian BOM.
 
-         The stream is encoded in little endian variant of UTF-32.
 
-      */
 
-     wxBOM_UTF32LE,
 
-     /**
 
-         UTF-16 big endian BOM.
 
-         The stream is encoded in big endian variant of UTF-16.
 
-      */
 
-     wxBOM_UTF16BE,
 
-     /**
 
-         UTF-16 little endian BOM.
 
-         The stream is encoded in little endian variant of UTF-16.
 
-      */
 
-     wxBOM_UTF16LE,
 
-     /**
 
-         UTF-8 BOM.
 
-         The stream is encoded in UTF-8.
 
-         Notice that contrary to a popular belief, it's perfectly possible and,
 
-         n fact, common under Microsoft Windows systems, to have a BOM in an
 
-         UTF-8 stream: while it's not used to indicate the endianness of UTF-8
 
-         stream (as it's byte-oriented), the BOM can still be useful just as an
 
-         unambiguous indicator of UTF-8 being used.
 
-      */
 
-     wxBOM_UTF8
 
- };
 
- /**
 
-     @class wxConvAuto
 
-     This class implements a Unicode to/from multibyte converter capable of
 
-     automatically recognizing the encoding of the multibyte text on input. The
 
-     logic used is very simple: the class uses the BOM (byte order mark) if it's
 
-     present and tries to interpret the input as UTF-8 otherwise. If this fails,
 
-     the input is interpreted as being in the default multibyte encoding which
 
-     can be specified in the constructor of a wxConvAuto instance and, in turn,
 
-     defaults to the value of GetFallbackEncoding() if not explicitly given.
 
-     For the conversion from Unicode to multibyte, the same encoding as was
 
-     previously used for multibyte to Unicode conversion is reused. If there had
 
-     been no previous multibyte to Unicode conversion, UTF-8 is used by default.
 
-     Notice that once the multibyte encoding is automatically detected, it
 
-     doesn't change any more, i.e. it is entirely determined by the first use of
 
-     wxConvAuto object in the multibyte-to-Unicode direction. However creating a
 
-     copy of wxConvAuto object, either via the usual copy constructor or
 
-     assignment operator, or using wxMBConv::Clone(), resets the automatically
 
-     detected encoding so that the new copy will try to detect the encoding of
 
-     the input on first use.
 
-     This class is used by default in wxWidgets classes and functions reading
 
-     text from files such as wxFile, wxFFile, wxTextFile, wxFileConfig and
 
-     various stream classes so the encoding set with its SetFallbackEncoding()
 
-     method will affect how these classes treat input files. In particular, use
 
-     this method to change the fall-back multibyte encoding used to interpret
 
-     the contents of the files whose contents isn't valid UTF-8 or to disallow
 
-     it completely.
 
-     @library{wxbase}
 
-     @category{data}
 
-     @see @ref overview_mbconv
 
- */
 
- class wxConvAuto : public wxMBConv
 
- {
 
- public:
 
-     /**
 
-         Constructs a new wxConvAuto instance. The object will try to detect the
 
-         input of the multibyte text given to its wxMBConv::ToWChar() method
 
-         automatically but if the automatic detection of Unicode encodings
 
-         fails, the fall-back encoding @a enc will be used to interpret it as
 
-         multibyte text.
 
-         The default value of @a enc, @c wxFONTENCODING_DEFAULT, means that the
 
-         global default value (which can be set using SetFallbackEncoding())
 
-         should be used. As with that method, passing @c wxFONTENCODING_MAX
 
-         inhibits using this encoding completely so the input multibyte text
 
-         will always be interpreted as UTF-8 in the absence of BOM and the
 
-         conversion will fail if the input doesn't form valid UTF-8 sequence.
 
-         Another special value is @c wxFONTENCODING_SYSTEM which means to use
 
-         the encoding currently used on the user system, i.e. the encoding
 
-         returned by wxLocale::GetSystemEncoding(). Any other encoding will be
 
-         used as is, e.g. passing @c wxFONTENCODING_ISO8859_1 ensures that
 
-         non-UTF-8 input will be treated as latin1.
 
-     */
 
-     wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT);
 
-     /**
 
-         Return the detected BOM type.
 
-         The BOM type is detected after sufficiently many initial bytes have
 
-         passed through this conversion object so it will always return
 
-         wxBOM_Unknown immediately after the object creation but may return a
 
-         different value later.
 
-         @since 2.9.3
 
-     */
 
-     wxBOM GetBOM() const;
 
-     /**
 
-         Return a pointer to the characters that makes up this BOM.
 
-         The returned character count is 2, 3 or 4, or undefined if the return
 
-         value is NULL.
 
-         @param bom
 
-             A valid BOM type, i.e. not wxBOM_Unknown or wxBOM_None.
 
-         @param count
 
-             A non-@NULL pointer receiving the number of characters in this BOM.
 
-         @return
 
-             Pointer to characters composing the BOM or @NULL if BOM is unknown
 
-             or invalid. Notice that the returned string is not NUL-terminated
 
-             and may contain embedded NULs so @a count must be used to handle it
 
-             correctly.
 
-         @since 2.9.3
 
-     */
 
-     const char* GetBOMChars(wxBOM bom, size_t* count);
 
-     /**
 
-         Disable the use of the fall back encoding: if the input doesn't have a
 
-         BOM and is not valid UTF-8, the conversion will fail.
 
-     */
 
-     static void DisableFallbackEncoding();
 
-     /**
 
-         Returns the encoding used by default by wxConvAuto if no other encoding
 
-         is explicitly specified in constructor. By default, returns
 
-         @c wxFONTENCODING_ISO8859_1 but can be changed using
 
-         SetFallbackEncoding().
 
-     */
 
-     static wxFontEncoding GetFallbackEncoding();
 
-     /**
 
-         Changes the encoding used by default by wxConvAuto if no other encoding
 
-         is explicitly specified in constructor. The default value, which can be
 
-         retrieved using GetFallbackEncoding(), is @c wxFONTENCODING_ISO8859_1.
 
-         Special values of @c wxFONTENCODING_SYSTEM or @c wxFONTENCODING_MAX can
 
-         be used for the @a enc parameter to use the encoding of the current
 
-         user locale as fall back or not use any encoding for fall back at all,
 
-         respectively (just as with the similar constructor parameter). However,
 
-         @c wxFONTENCODING_DEFAULT can't be used here.
 
-     */
 
-     static void SetFallbackEncoding(wxFontEncoding enc);
 
-     /**
 
-         Return the BOM type of this buffer.
 
-         This is a helper function which is normally only used internally by
 
-         wxConvAuto but provided for convenience of the code that wants to
 
-         detect the encoding of a stream by checking it for BOM presence on its
 
-         own.
 
-         @since 2.9.3
 
-     */
 
-     static wxBOM DetectBOM(const char *src, size_t srcLen);
 
- };
 
 
  |