xml/expat/lib/xmltok.h

00001 /*
00002 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
00003 See the file COPYING for copying permission.
00004 */
00005 
00006 #ifndef XmlTok_INCLUDED
00007 #define XmlTok_INCLUDED 1
00008 
00009 #ifdef __cplusplus
00010 extern "C" {
00011 #endif
00012 
00013 /* The following token may be returned by XmlContentTok */
00014 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of
00015                                     illegal ]]> sequence */
00016 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
00017 #define XML_TOK_NONE -4    /* The string to be scanned is empty */
00018 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
00019                                   might be part of CRLF sequence */ 
00020 #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
00021 #define XML_TOK_PARTIAL -1 /* only part of a token */
00022 #define XML_TOK_INVALID 0
00023 
00024 /* The following tokens are returned by XmlContentTok; some are also
00025   returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
00026 
00027 #define XML_TOK_START_TAG_WITH_ATTS 1
00028 #define XML_TOK_START_TAG_NO_ATTS 2
00029 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
00030 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
00031 #define XML_TOK_END_TAG 5
00032 #define XML_TOK_DATA_CHARS 6
00033 #define XML_TOK_DATA_NEWLINE 7
00034 #define XML_TOK_CDATA_SECT_OPEN 8
00035 #define XML_TOK_ENTITY_REF 9
00036 #define XML_TOK_CHAR_REF 10     /* numeric character reference */
00037 
00038 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
00039 #define XML_TOK_PI 11      /* processing instruction */
00040 #define XML_TOK_XML_DECL 12 /* XML decl or text decl */
00041 #define XML_TOK_COMMENT 13
00042 #define XML_TOK_BOM 14     /* Byte order mark */
00043 
00044 /* The following tokens are returned only by XmlPrologTok */
00045 #define XML_TOK_PROLOG_S 15
00046 #define XML_TOK_DECL_OPEN 16 /* <!foo */
00047 #define XML_TOK_DECL_CLOSE 17 /* > */
00048 #define XML_TOK_NAME 18
00049 #define XML_TOK_NMTOKEN 19
00050 #define XML_TOK_POUND_NAME 20 /* #name */
00051 #define XML_TOK_OR 21 /* | */
00052 #define XML_TOK_PERCENT 22
00053 #define XML_TOK_OPEN_PAREN 23
00054 #define XML_TOK_CLOSE_PAREN 24
00055 #define XML_TOK_OPEN_BRACKET 25
00056 #define XML_TOK_CLOSE_BRACKET 26
00057 #define XML_TOK_LITERAL 27
00058 #define XML_TOK_PARAM_ENTITY_REF 28
00059 #define XML_TOK_INSTANCE_START 29
00060 
00061 /* The following occur only in element type declarations */
00062 #define XML_TOK_NAME_QUESTION 30 /* name? */
00063 #define XML_TOK_NAME_ASTERISK 31 /* name* */
00064 #define XML_TOK_NAME_PLUS 32 /* name+ */
00065 #define XML_TOK_COND_SECT_OPEN 33 /* <![ */
00066 #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
00067 #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
00068 #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
00069 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
00070 #define XML_TOK_COMMA 38
00071 
00072 /* The following token is returned only by XmlAttributeValueTok */
00073 #define XML_TOK_ATTRIBUTE_VALUE_S 39
00074 
00075 /* The following token is returned only by XmlCdataSectionTok */
00076 #define XML_TOK_CDATA_SECT_CLOSE 40
00077 
00078 /* With namespace processing this is returned by XmlPrologTok
00079    for a name with a colon. */
00080 #define XML_TOK_PREFIXED_NAME 41
00081 
00082 #ifdef XML_DTD
00083 #define XML_TOK_IGNORE_SECT 42
00084 #endif /* XML_DTD */
00085 
00086 #ifdef XML_DTD
00087 #define XML_N_STATES 4
00088 #else /* not XML_DTD */
00089 #define XML_N_STATES 3
00090 #endif /* not XML_DTD */
00091 
00092 #define XML_PROLOG_STATE 0
00093 #define XML_CONTENT_STATE 1
00094 #define XML_CDATA_SECTION_STATE 2
00095 #ifdef XML_DTD
00096 #define XML_IGNORE_SECTION_STATE 3
00097 #endif /* XML_DTD */
00098 
00099 #define XML_N_LITERAL_TYPES 2
00100 #define XML_ATTRIBUTE_VALUE_LITERAL 0
00101 #define XML_ENTITY_VALUE_LITERAL 1
00102 
00103 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
00104 #define XML_UTF8_ENCODE_MAX 4
00105 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
00106 #define XML_UTF16_ENCODE_MAX 2
00107 
00108 typedef struct position {
00109   /* first line and first column are 0 not 1 */
00110   unsigned long lineNumber;
00111   unsigned long columnNumber;
00112 } POSITION;
00113 
00114 typedef struct {
00115   const char *name;
00116   const char *valuePtr;
00117   const char *valueEnd;
00118   char normalized;
00119 } ATTRIBUTE;
00120 
00121 struct encoding;
00122 typedef struct encoding ENCODING;
00123 
00124 struct encoding {
00125   int (*scanners[XML_N_STATES])(const ENCODING *,
00126                                 const char *,
00127                                 const char *,
00128                                 const char **);
00129   int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *,
00130                                               const char *,
00131                                               const char *,
00132                                               const char **);
00133   int (*sameName)(const ENCODING *,
00134                   const char *, const char *);
00135   int (*nameMatchesAscii)(const ENCODING *,
00136                           const char *, const char *, const char *);
00137   int (*nameLength)(const ENCODING *, const char *);
00138   const char *(*skipS)(const ENCODING *, const char *);
00139   int (*getAtts)(const ENCODING *enc, const char *ptr,
00140                  int attsMax, ATTRIBUTE *atts);
00141   int (*charRefNumber)(const ENCODING *enc, const char *ptr);
00142   int (*predefinedEntityName)(const ENCODING *, const char *, const char *);
00143   void (*updatePosition)(const ENCODING *,
00144                          const char *ptr,
00145                          const char *end,
00146                          POSITION *);
00147   int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
00148                     const char **badPtr);
00149   void (*utf8Convert)(const ENCODING *enc,
00150                       const char **fromP,
00151                       const char *fromLim,
00152                       char **toP,
00153                       const char *toLim);
00154   void (*utf16Convert)(const ENCODING *enc,
00155                        const char **fromP,
00156                        const char *fromLim,
00157                        unsigned short **toP,
00158                        const unsigned short *toLim);
00159   int minBytesPerChar;
00160   char isUtf8;
00161   char isUtf16;
00162 };
00163 
00164 /*
00165 Scan the string starting at ptr until the end of the next complete token,
00166 but do not scan past eptr.  Return an integer giving the type of token.
00167 
00168 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
00169 
00170 Return XML_TOK_PARTIAL when the string does not contain a complete token;
00171 nextTokPtr will not be set.
00172 
00173 Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
00174 will be set to point to the character which made the token invalid.
00175 
00176 Otherwise the string starts with a valid token; nextTokPtr will be set to point
00177 to the character following the end of that token.
00178 
00179 Each data character counts as a single token, but adjacent data characters
00180 may be returned together.  Similarly for characters in the prolog outside
00181 literals, comments and processing instructions.
00182 */
00183 
00184 
00185 #define XmlTok(enc, state, ptr, end, nextTokPtr) \
00186   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
00187 
00188 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
00189    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
00190 
00191 #define XmlContentTok(enc, ptr, end, nextTokPtr) \
00192    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
00193 
00194 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
00195    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
00196 
00197 #ifdef XML_DTD
00198 
00199 #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
00200    XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
00201 
00202 #endif /* XML_DTD */
00203 
00204 /* This is used for performing a 2nd-level tokenization on
00205 the content of a literal that has already been returned by XmlTok. */ 
00206 
00207 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
00208   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
00209 
00210 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
00211    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
00212 
00213 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
00214    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
00215 
00216 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
00217 
00218 #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
00219   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
00220 
00221 #define XmlNameLength(enc, ptr) \
00222   (((enc)->nameLength)(enc, ptr))
00223 
00224 #define XmlSkipS(enc, ptr) \
00225   (((enc)->skipS)(enc, ptr))
00226 
00227 #define XmlGetAttributes(enc, ptr, attsMax, atts) \
00228   (((enc)->getAtts)(enc, ptr, attsMax, atts))
00229 
00230 #define XmlCharRefNumber(enc, ptr) \
00231   (((enc)->charRefNumber)(enc, ptr))
00232 
00233 #define XmlPredefinedEntityName(enc, ptr, end) \
00234   (((enc)->predefinedEntityName)(enc, ptr, end))
00235 
00236 #define XmlUpdatePosition(enc, ptr, end, pos) \
00237   (((enc)->updatePosition)(enc, ptr, end, pos))
00238 
00239 #define XmlIsPublicId(enc, ptr, end, badPtr) \
00240   (((enc)->isPublicId)(enc, ptr, end, badPtr))
00241 
00242 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
00243   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
00244 
00245 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
00246   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
00247 
00248 typedef struct {
00249   ENCODING initEnc;
00250   const ENCODING **encPtr;
00251 } INIT_ENCODING;
00252 
00253 int  XmlParseXmlDecl(int isGeneralTextEntity,
00254                               const ENCODING *enc,
00255                               const char *ptr,
00256                               const char *end,
00257                               const char **badPtr,
00258                               const char **versionPtr,
00259                               const char **versionEndPtr,
00260                               const char **encodingNamePtr,
00261                               const ENCODING **namedEncodingPtr,
00262                               int *standalonePtr);
00263 
00264 int  XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
00265 const ENCODING  *XmlGetUtf8InternalEncoding(void);
00266 const ENCODING  *XmlGetUtf16InternalEncoding(void);
00267 int  XmlUtf8Encode(int charNumber, char *buf);
00268 int  XmlUtf16Encode(int charNumber, unsigned short *buf);
00269 
00270 int  XmlSizeOfUnknownEncoding(void);
00271 ENCODING  *
00272 XmlInitUnknownEncoding(void *mem,
00273                        int *table,
00274                        int (*conv)(void *userData, const char *p),
00275                        void *userData);
00276 
00277 int  XmlParseXmlDeclNS(int isGeneralTextEntity,
00278                                 const ENCODING *enc,
00279                                 const char *ptr,
00280                                 const char *end,
00281                                 const char **badPtr,
00282                                 const char **versionPtr,
00283                                 const char **versionEndPtr,
00284                                 const char **encodingNamePtr,
00285                                 const ENCODING **namedEncodingPtr,
00286                                 int *standalonePtr);
00287 int  XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
00288 const ENCODING  *XmlGetUtf8InternalEncodingNS(void);
00289 const ENCODING  *XmlGetUtf16InternalEncodingNS(void);
00290 ENCODING  *
00291 XmlInitUnknownEncodingNS(void *mem,
00292                          int *table,
00293                          int (*conv)(void *userData, const char *p),
00294                          void *userData);
00295 #ifdef __cplusplus
00296 }
00297 #endif
00298 
00299 #endif /* not XmlTok_INCLUDED */

Generated on Mon Nov 26 11:24:10 2007 for Apache Portable Runtime Utility Library by  doxygen 1.5.2