encoding.h

00001 /*
00002 
00003  * Summary: interface for the encoding conversion functions
00004 
00005  * Description: interface for the encoding conversion functions needed for
00006 
00007  *              XML basic encoding and iconv() support.
00008 
00009  *
00010 
00011  * Related specs are
00012 
00013  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
00014 
00015  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
00016 
00017  * [ISO-8859-1]   ISO Latin-1 characters codes.
00018 
00019  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
00020 
00021  *                Worldwide Character Encoding -- Version 1.0", Addison-
00022 
00023  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
00024 
00025  *                described in Unicode Technical Report #4.
00026 
00027  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
00028 
00029  *                Information Interchange, ANSI X3.4-1986.
00030 
00031  *
00032 
00033  * Copy: See Copyright for the status of this software.
00034 
00035  *
00036 
00037  * Author: Daniel Veillard
00038 
00039  */
00040 
00041 
00042 
00043 #ifndef __XML_CHAR_ENCODING_H__
00044 
00045 #define __XML_CHAR_ENCODING_H__
00046 
00047 
00048 
00049 #include <libxml/xmlversion.h>
00050 
00051 
00052 
00053 #ifdef LIBXML_ICONV_ENABLED
00054 
00055 #include <iconv.h>
00056 
00057 #endif
00058 
00059 #ifdef __cplusplus
00060 
00061 extern "C" {
00062 
00063 #endif
00064 
00065 
00066 
00067 /*
00068 
00069  * xmlCharEncoding:
00070 
00071  *
00072 
00073  * Predefined values for some standard encodings.
00074 
00075  * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
00076 
00077  * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
00078 
00079  *
00080 
00081  * Anything else would have to be translated to UTF8 before being
00082 
00083  * given to the parser itself. The BOM for UTF16 and the encoding
00084 
00085  * declaration are looked at and a converter is looked for at that
00086 
00087  * point. If not found the parser stops here as asked by the XML REC. A
00088 
00089  * converter can be registered by the user using xmlRegisterCharEncodingHandler
00090 
00091  * but the current form doesn't allow stateful transcoding (a serious
00092 
00093  * problem agreed !). If iconv has been found it will be used
00094 
00095  * automatically and allow stateful transcoding, the simplest is then
00096 
00097  * to be sure to enable iconv and to provide iconv libs for the encoding
00098 
00099  * support needed.
00100 
00101  *
00102 
00103  * Note that the generic "UTF-16" is not a predefined value.  Instead, only
00104 
00105  * the specific UTF-16LE and UTF-16BE are present.
00106 
00107  */
00108 
00109 typedef enum {
00110 
00111     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
00112 
00113     XML_CHAR_ENCODING_NONE=     0, /* No char encoding detected */
00114 
00115     XML_CHAR_ENCODING_UTF8=     1, /* UTF-8 */
00116 
00117     XML_CHAR_ENCODING_UTF16LE=  2, /* UTF-16 little endian */
00118 
00119     XML_CHAR_ENCODING_UTF16BE=  3, /* UTF-16 big endian */
00120 
00121     XML_CHAR_ENCODING_UCS4LE=   4, /* UCS-4 little endian */
00122 
00123     XML_CHAR_ENCODING_UCS4BE=   5, /* UCS-4 big endian */
00124 
00125     XML_CHAR_ENCODING_EBCDIC=   6, /* EBCDIC uh! */
00126 
00127     XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
00128 
00129     XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
00130 
00131     XML_CHAR_ENCODING_UCS2=     9, /* UCS-2 */
00132 
00133     XML_CHAR_ENCODING_8859_1=   10,/* ISO-8859-1 ISO Latin 1 */
00134 
00135     XML_CHAR_ENCODING_8859_2=   11,/* ISO-8859-2 ISO Latin 2 */
00136 
00137     XML_CHAR_ENCODING_8859_3=   12,/* ISO-8859-3 */
00138 
00139     XML_CHAR_ENCODING_8859_4=   13,/* ISO-8859-4 */
00140 
00141     XML_CHAR_ENCODING_8859_5=   14,/* ISO-8859-5 */
00142 
00143     XML_CHAR_ENCODING_8859_6=   15,/* ISO-8859-6 */
00144 
00145     XML_CHAR_ENCODING_8859_7=   16,/* ISO-8859-7 */
00146 
00147     XML_CHAR_ENCODING_8859_8=   17,/* ISO-8859-8 */
00148 
00149     XML_CHAR_ENCODING_8859_9=   18,/* ISO-8859-9 */
00150 
00151     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
00152 
00153     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
00154 
00155     XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
00156 
00157     XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
00158 
00159 } xmlCharEncoding;
00160 
00161 
00162 
00195 typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
00196 
00197                                          const unsigned char *in, int *inlen);
00198 
00199 
00200 
00201 
00202 
00239 typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
00240 
00241                                           const unsigned char *in, int *inlen);
00242 
00243 
00244 
00245 
00246 
00247 /*
00248 
00249  * Block defining the handlers for non UTF-8 encodings.
00250 
00251  * If iconv is supported, there are two extra fields.
00252 
00253  */
00254 
00255 
00256 
00257 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
00258 
00259 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
00260 
00261 struct _xmlCharEncodingHandler {
00262 
00263     char                       *name;
00264 
00265     xmlCharEncodingInputFunc   input;
00266 
00267     xmlCharEncodingOutputFunc  output;
00268 
00269 #ifdef LIBXML_ICONV_ENABLED
00270 
00271     iconv_t                    iconv_in;
00272 
00273     iconv_t                    iconv_out;
00274 
00275 #endif /* LIBXML_ICONV_ENABLED */
00276 
00277 };
00278 
00279 
00280 
00281 #ifdef __cplusplus
00282 
00283 }
00284 
00285 #endif
00286 
00287 #include <libxml/tree.h>
00288 
00289 #ifdef __cplusplus
00290 
00291 extern "C" {
00292 
00293 #endif
00294 
00295 
00296 
00297 /*
00298 
00299  * Interfaces for encoding handlers.
00300 
00301  */
00302 
00303 XMLPUBFUN void XMLCALL  
00304 
00305         xmlInitCharEncodingHandlers     (void);
00306 
00307 XMLPUBFUN void XMLCALL  
00308 
00309         xmlCleanupCharEncodingHandlers  (void);
00310 
00311 XMLPUBFUN void XMLCALL  
00312 
00313         xmlRegisterCharEncodingHandler  (xmlCharEncodingHandlerPtr handler);
00314 
00315 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
00316 
00317         xmlGetCharEncodingHandler       (xmlCharEncoding enc);
00318 
00319 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
00320 
00321         xmlFindCharEncodingHandler      (const char *name);
00322 
00323 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
00324 
00325         xmlNewCharEncodingHandler       (const char *name, 
00326 
00327                                          xmlCharEncodingInputFunc input,
00328 
00329                                          xmlCharEncodingOutputFunc output);
00330 
00331 
00332 
00333 /*
00334 
00335  * Interfaces for encoding names and aliases.
00336 
00337  */
00338 
00339 XMLPUBFUN int XMLCALL   
00340 
00341         xmlAddEncodingAlias             (const char *name,
00342 
00343                                          const char *alias);
00344 
00345 XMLPUBFUN int XMLCALL   
00346 
00347         xmlDelEncodingAlias             (const char *alias);
00348 
00349 XMLPUBFUN const char * XMLCALL
00350 
00351         xmlGetEncodingAlias             (const char *alias);
00352 
00353 XMLPUBFUN void XMLCALL  
00354 
00355         xmlCleanupEncodingAliases       (void);
00356 
00357 XMLPUBFUN xmlCharEncoding XMLCALL
00358 
00359         xmlParseCharEncoding            (const char *name);
00360 
00361 XMLPUBFUN const char * XMLCALL
00362 
00363         xmlGetCharEncodingName          (xmlCharEncoding enc);
00364 
00365 
00366 
00367 /*
00368 
00369  * Interfaces directly used by the parsers.
00370 
00371  */
00372 
00373 XMLPUBFUN xmlCharEncoding XMLCALL
00374 
00375         xmlDetectCharEncoding           (const unsigned char *in,
00376 
00377                                          int len);
00378 
00379 
00380 
00381 XMLPUBFUN int XMLCALL   
00382 
00383         xmlCharEncOutFunc               (xmlCharEncodingHandler *handler,
00384 
00385                                          xmlBufferPtr out,
00386 
00387                                          xmlBufferPtr in);
00388 
00389 
00390 
00391 XMLPUBFUN int XMLCALL   
00392 
00393         xmlCharEncInFunc                (xmlCharEncodingHandler *handler,
00394 
00395                                          xmlBufferPtr out,
00396 
00397                                          xmlBufferPtr in);
00398 
00399 XMLPUBFUN int XMLCALL
00400 
00401         xmlCharEncFirstLine             (xmlCharEncodingHandler *handler,
00402 
00403                                          xmlBufferPtr out,
00404 
00405                                          xmlBufferPtr in);
00406 
00407 XMLPUBFUN int XMLCALL   
00408 
00409         xmlCharEncCloseFunc             (xmlCharEncodingHandler *handler);
00410 
00411 
00412 
00413 /*
00414 
00415  * Export a few useful functions
00416 
00417  */
00418 
00419 #ifdef LIBXML_OUTPUT_ENABLED
00420 
00421 XMLPUBFUN int XMLCALL   
00422 
00423         UTF8Toisolat1                   (unsigned char *out,
00424 
00425                                          int *outlen,
00426 
00427                                          const unsigned char *in,
00428 
00429                                          int *inlen);
00430 
00431 #endif /* LIBXML_OUTPUT_ENABLED */
00432 
00433 XMLPUBFUN int XMLCALL   
00434 
00435         isolat1ToUTF8                   (unsigned char *out,
00436 
00437                                          int *outlen,
00438 
00439                                          const unsigned char *in,
00440 
00441                                          int *inlen);
00442 
00443 #ifdef __cplusplus
00444 
00445 }
00446 
00447 #endif
00448 
00449 
00450 
00451 #endif /* __XML_CHAR_ENCODING_H__ */
00452 

Generated on Wed Jul 26 13:30:46 2006 for XSB by  doxygen 1.4.5