Apache Portable Runtime Utility Library
|
00001 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 00002 See the file COPYING for copying permission. 00003 */ 00004 00005 #ifndef XmlTok_INCLUDED 00006 #define XmlTok_INCLUDED 1 00007 00008 #ifdef __cplusplus 00009 extern "C" { 00010 #endif 00011 00012 /* The following token may be returned by XmlContentTok */ 00013 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be 00014 start of illegal ]]> sequence */ 00015 /* The following tokens may be returned by both XmlPrologTok and 00016 XmlContentTok. 00017 */ 00018 #define XML_TOK_NONE -4 /* The string to be scanned is empty */ 00019 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; 00020 might be part of CRLF sequence */ 00021 #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ 00022 #define XML_TOK_PARTIAL -1 /* only part of a token */ 00023 #define XML_TOK_INVALID 0 00024 00025 /* The following tokens are returned by XmlContentTok; some are also 00026 returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. 00027 */ 00028 #define XML_TOK_START_TAG_WITH_ATTS 1 00029 #define XML_TOK_START_TAG_NO_ATTS 2 00030 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 00031 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 00032 #define XML_TOK_END_TAG 5 00033 #define XML_TOK_DATA_CHARS 6 00034 #define XML_TOK_DATA_NEWLINE 7 00035 #define XML_TOK_CDATA_SECT_OPEN 8 00036 #define XML_TOK_ENTITY_REF 9 00037 #define XML_TOK_CHAR_REF 10 /* numeric character reference */ 00038 00039 /* The following tokens may be returned by both XmlPrologTok and 00040 XmlContentTok. 00041 */ 00042 #define XML_TOK_PI 11 /* processing instruction */ 00043 #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ 00044 #define XML_TOK_COMMENT 13 00045 #define XML_TOK_BOM 14 /* Byte order mark */ 00046 00047 /* The following tokens are returned only by XmlPrologTok */ 00048 #define XML_TOK_PROLOG_S 15 00049 #define XML_TOK_DECL_OPEN 16 /* <!foo */ 00050 #define XML_TOK_DECL_CLOSE 17 /* > */ 00051 #define XML_TOK_NAME 18 00052 #define XML_TOK_NMTOKEN 19 00053 #define XML_TOK_POUND_NAME 20 /* #name */ 00054 #define XML_TOK_OR 21 /* | */ 00055 #define XML_TOK_PERCENT 22 00056 #define XML_TOK_OPEN_PAREN 23 00057 #define XML_TOK_CLOSE_PAREN 24 00058 #define XML_TOK_OPEN_BRACKET 25 00059 #define XML_TOK_CLOSE_BRACKET 26 00060 #define XML_TOK_LITERAL 27 00061 #define XML_TOK_PARAM_ENTITY_REF 28 00062 #define XML_TOK_INSTANCE_START 29 00063 00064 /* The following occur only in element type declarations */ 00065 #define XML_TOK_NAME_QUESTION 30 /* name? */ 00066 #define XML_TOK_NAME_ASTERISK 31 /* name* */ 00067 #define XML_TOK_NAME_PLUS 32 /* name+ */ 00068 #define XML_TOK_COND_SECT_OPEN 33 /* <![ */ 00069 #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ 00070 #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ 00071 #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ 00072 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ 00073 #define XML_TOK_COMMA 38 00074 00075 /* The following token is returned only by XmlAttributeValueTok */ 00076 #define XML_TOK_ATTRIBUTE_VALUE_S 39 00077 00078 /* The following token is returned only by XmlCdataSectionTok */ 00079 #define XML_TOK_CDATA_SECT_CLOSE 40 00080 00081 /* With namespace processing this is returned by XmlPrologTok for a 00082 name with a colon. 00083 */ 00084 #define XML_TOK_PREFIXED_NAME 41 00085 00086 #ifdef XML_DTD 00087 #define XML_TOK_IGNORE_SECT 42 00088 #endif /* XML_DTD */ 00089 00090 #ifdef XML_DTD 00091 #define XML_N_STATES 4 00092 #else /* not XML_DTD */ 00093 #define XML_N_STATES 3 00094 #endif /* not XML_DTD */ 00095 00096 #define XML_PROLOG_STATE 0 00097 #define XML_CONTENT_STATE 1 00098 #define XML_CDATA_SECTION_STATE 2 00099 #ifdef XML_DTD 00100 #define XML_IGNORE_SECTION_STATE 3 00101 #endif /* XML_DTD */ 00102 00103 #define XML_N_LITERAL_TYPES 2 00104 #define XML_ATTRIBUTE_VALUE_LITERAL 0 00105 #define XML_ENTITY_VALUE_LITERAL 1 00106 00107 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 00108 #define XML_UTF8_ENCODE_MAX 4 00109 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 00110 #define XML_UTF16_ENCODE_MAX 2 00111 00112 typedef struct position { 00113 /* first line and first column are 0 not 1 */ 00114 unsigned long lineNumber; 00115 unsigned long columnNumber; 00116 } POSITION; 00117 00118 typedef struct { 00119 const char *name; 00120 const char *valuePtr; 00121 const char *valueEnd; 00122 char normalized; 00123 } ATTRIBUTE; 00124 00125 struct encoding; 00126 typedef struct encoding ENCODING; 00127 00128 typedef int (PTRCALL *SCANNER)(const ENCODING *, 00129 const char *, 00130 const char *, 00131 const char **); 00132 00133 struct encoding { 00134 SCANNER scanners[XML_N_STATES]; 00135 SCANNER literalScanners[XML_N_LITERAL_TYPES]; 00136 int (PTRCALL *sameName)(const ENCODING *, 00137 const char *, 00138 const char *); 00139 int (PTRCALL *nameMatchesAscii)(const ENCODING *, 00140 const char *, 00141 const char *, 00142 const char *); 00143 int (PTRFASTCALL *nameLength)(const ENCODING *, const char *); 00144 const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); 00145 int (PTRCALL *getAtts)(const ENCODING *enc, 00146 const char *ptr, 00147 int attsMax, 00148 ATTRIBUTE *atts); 00149 int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); 00150 int (PTRCALL *predefinedEntityName)(const ENCODING *, 00151 const char *, 00152 const char *); 00153 void (PTRCALL *updatePosition)(const ENCODING *, 00154 const char *ptr, 00155 const char *end, 00156 POSITION *); 00157 int (PTRCALL *isPublicId)(const ENCODING *enc, 00158 const char *ptr, 00159 const char *end, 00160 const char **badPtr); 00161 void (PTRCALL *utf8Convert)(const ENCODING *enc, 00162 const char **fromP, 00163 const char *fromLim, 00164 char **toP, 00165 const char *toLim); 00166 void (PTRCALL *utf16Convert)(const ENCODING *enc, 00167 const char **fromP, 00168 const char *fromLim, 00169 unsigned short **toP, 00170 const unsigned short *toLim); 00171 int minBytesPerChar; 00172 char isUtf8; 00173 char isUtf16; 00174 }; 00175 00176 /* Scan the string starting at ptr until the end of the next complete 00177 token, but do not scan past eptr. Return an integer giving the 00178 type of token. 00179 00180 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 00181 00182 Return XML_TOK_PARTIAL when the string does not contain a complete 00183 token; nextTokPtr will not be set. 00184 00185 Return XML_TOK_INVALID when the string does not start a valid 00186 token; nextTokPtr will be set to point to the character which made 00187 the token invalid. 00188 00189 Otherwise the string starts with a valid token; nextTokPtr will be 00190 set to point to the character following the end of that token. 00191 00192 Each data character counts as a single token, but adjacent data 00193 characters may be returned together. Similarly for characters in 00194 the prolog outside literals, comments and processing instructions. 00195 */ 00196 00197 00198 #define XmlTok(enc, state, ptr, end, nextTokPtr) \ 00199 (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 00200 00201 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ 00202 XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 00203 00204 #define XmlContentTok(enc, ptr, end, nextTokPtr) \ 00205 XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 00206 00207 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ 00208 XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 00209 00210 #ifdef XML_DTD 00211 00212 #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ 00213 XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) 00214 00215 #endif /* XML_DTD */ 00216 00217 /* This is used for performing a 2nd-level tokenization on the content 00218 of a literal that has already been returned by XmlTok. 00219 */ 00220 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 00221 (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 00222 00223 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 00224 XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 00225 00226 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ 00227 XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 00228 00229 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) 00230 00231 #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ 00232 (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) 00233 00234 #define XmlNameLength(enc, ptr) \ 00235 (((enc)->nameLength)(enc, ptr)) 00236 00237 #define XmlSkipS(enc, ptr) \ 00238 (((enc)->skipS)(enc, ptr)) 00239 00240 #define XmlGetAttributes(enc, ptr, attsMax, atts) \ 00241 (((enc)->getAtts)(enc, ptr, attsMax, atts)) 00242 00243 #define XmlCharRefNumber(enc, ptr) \ 00244 (((enc)->charRefNumber)(enc, ptr)) 00245 00246 #define XmlPredefinedEntityName(enc, ptr, end) \ 00247 (((enc)->predefinedEntityName)(enc, ptr, end)) 00248 00249 #define XmlUpdatePosition(enc, ptr, end, pos) \ 00250 (((enc)->updatePosition)(enc, ptr, end, pos)) 00251 00252 #define XmlIsPublicId(enc, ptr, end, badPtr) \ 00253 (((enc)->isPublicId)(enc, ptr, end, badPtr)) 00254 00255 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ 00256 (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 00257 00258 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ 00259 (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 00260 00261 typedef struct { 00262 ENCODING initEnc; 00263 const ENCODING **encPtr; 00264 } INIT_ENCODING; 00265 00266 int XmlParseXmlDecl(int isGeneralTextEntity, 00267 const ENCODING *enc, 00268 const char *ptr, 00269 const char *end, 00270 const char **badPtr, 00271 const char **versionPtr, 00272 const char **versionEndPtr, 00273 const char **encodingNamePtr, 00274 const ENCODING **namedEncodingPtr, 00275 int *standalonePtr); 00276 00277 int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); 00278 const ENCODING *XmlGetUtf8InternalEncoding(void); 00279 const ENCODING *XmlGetUtf16InternalEncoding(void); 00280 int FASTCALL XmlUtf8Encode(int charNumber, char *buf); 00281 int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); 00282 int XmlSizeOfUnknownEncoding(void); 00283 00284 typedef int (*CONVERTER)(void *userData, const char *p); 00285 00286 ENCODING * 00287 XmlInitUnknownEncoding(void *mem, 00288 int *table, 00289 CONVERTER convert, 00290 void *userData); 00291 00292 int XmlParseXmlDeclNS(int isGeneralTextEntity, 00293 const ENCODING *enc, 00294 const char *ptr, 00295 const char *end, 00296 const char **badPtr, 00297 const char **versionPtr, 00298 const char **versionEndPtr, 00299 const char **encodingNamePtr, 00300 const ENCODING **namedEncodingPtr, 00301 int *standalonePtr); 00302 00303 int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); 00304 const ENCODING *XmlGetUtf8InternalEncodingNS(void); 00305 const ENCODING *XmlGetUtf16InternalEncodingNS(void); 00306 ENCODING * 00307 XmlInitUnknownEncodingNS(void *mem, 00308 int *table, 00309 CONVERTER convert, 00310 void *userData); 00311 #ifdef __cplusplus 00312 } 00313 #endif 00314 00315 #endif /* not XmlTok_INCLUDED */