Apache Portable Runtime
apr_xml.h
Go to the documentation of this file.
00001 /* Licensed to the Apache Software Foundation (ASF) under one or more
00002  * contributor license agreements.  See the NOTICE file distributed with
00003  * this work for additional information regarding copyright ownership.
00004  * The ASF licenses this file to You under the Apache License, Version 2.0
00005  * (the "License"); you may not use this file except in compliance with
00006  * the License.  You may obtain a copy of the License at
00007  *
00008  *     http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 /**
00017  * @file apr_xml.h
00018  * @brief APR-UTIL XML Library
00019  */
00020 #ifndef APR_XML_H
00021 #define APR_XML_H
00022 
00023 /**
00024  * @defgroup APR_Util_XML XML 
00025  * @ingroup APR
00026  * @{
00027  */
00028 #include "apr_pools.h"
00029 #include "apr_tables.h"
00030 #include "apr_file_io.h"
00031 
00032 #include "apu.h"
00033 #if APR_CHARSET_EBCDIC
00034 #include "apr_xlate.h"
00035 #endif
00036 
00037 #ifdef __cplusplus
00038 extern "C" {
00039 #endif
00040 
00041 /**
00042  * @package Apache XML library
00043  */
00044 
00045 /* -------------------------------------------------------------------- */
00046 
00047 /* ### these will need to move at some point to a more logical spot */
00048 
00049 /** @see apr_text */
00050 typedef struct apr_text apr_text;
00051 
00052 /** Structure to keep a linked list of pieces of text */
00053 struct apr_text {
00054     /** The current piece of text */
00055     const char *text;
00056     /** a pointer to the next piece of text */
00057     struct apr_text *next;
00058 };
00059 
00060 /** @see apr_text_header */
00061 typedef struct apr_text_header apr_text_header;
00062 
00063 /** A list of pieces of text */
00064 struct apr_text_header {
00065     /** The first piece of text in the list */
00066     apr_text *first;
00067     /** The last piece of text in the list */
00068     apr_text *last;
00069 };
00070 
00071 /**
00072  * Append a piece of text to the end of a list
00073  * @param p The pool to allocate out of
00074  * @param hdr The text header to append to
00075  * @param text The new text to append
00076  */
00077 APR_DECLARE(void) apr_text_append(apr_pool_t *p, apr_text_header *hdr,
00078                                   const char *text);
00079 
00080 
00081 /* --------------------------------------------------------------------
00082 **
00083 ** XML PARSING
00084 */
00085 
00086 /*
00087 ** Qualified namespace values
00088 **
00089 ** APR_XML_NS_DAV_ID
00090 **    We always insert the "DAV:" namespace URI at the head of the
00091 **    namespace array. This means that it will always be at ID==0,
00092 **    making it much easier to test for.
00093 **
00094 ** APR_XML_NS_NONE
00095 **    This special ID is used for two situations:
00096 **
00097 **    1) The namespace prefix begins with "xml" (and we do not know
00098 **       what it means). Namespace prefixes with "xml" (any case) as
00099 **       their first three characters are reserved by the XML Namespaces
00100 **       specification for future use. mod_dav will pass these through
00101 **       unchanged. When this identifier is used, the prefix is LEFT in
00102 **       the element/attribute name. Downstream processing should not
00103 **       prepend another prefix.
00104 **
00105 **    2) The element/attribute does not have a namespace.
00106 **
00107 **       a) No prefix was used, and a default namespace has not been
00108 **          defined.
00109 **       b) No prefix was used, and the default namespace was specified
00110 **          to mean "no namespace". This is done with a namespace
00111 **          declaration of:  xmlns=""
00112 **          (this declaration is typically used to override a previous
00113 **          specification for the default namespace)
00114 **
00115 **       In these cases, we need to record that the elem/attr has no
00116 **       namespace so that we will not attempt to prepend a prefix.
00117 **       All namespaces that are used will have a prefix assigned to
00118 **       them -- mod_dav will never set or use the default namespace
00119 **       when generating XML. This means that "no prefix" will always
00120 **       mean "no namespace".
00121 **
00122 **    In both cases, the XML generation will avoid prepending a prefix.
00123 **    For the first case, this means the original prefix/name will be
00124 **    inserted into the output stream. For the latter case, it means
00125 **    the name will have no prefix, and since we never define a default
00126 **    namespace, this means it will have no namespace.
00127 **
00128 ** Note: currently, mod_dav understands the "xmlns" prefix and the
00129 **     "xml:lang" attribute. These are handled specially (they aren't
00130 **     left within the XML tree), so the APR_XML_NS_NONE value won't ever
00131 **     really apply to these values.
00132 */
00133 #define APR_XML_NS_DAV_ID       0       /**< namespace ID for "DAV:" */
00134 #define APR_XML_NS_NONE         -10     /**< no namespace for this elem/attr */
00135 
00136 #define APR_XML_NS_ERROR_BASE   -100    /**< used only during processing */
00137 /** Is this namespace an error? */
00138 #define APR_XML_NS_IS_ERROR(e)  ((e) <= APR_XML_NS_ERROR_BASE)
00139 
00140 /** @see apr_xml_attr */
00141 typedef struct apr_xml_attr apr_xml_attr;
00142 /** @see apr_xml_elem */
00143 typedef struct apr_xml_elem apr_xml_elem;
00144 /** @see apr_xml_doc */
00145 typedef struct apr_xml_doc apr_xml_doc;
00146 
00147 /** apr_xml_attr: holds a parsed XML attribute */
00148 struct apr_xml_attr {
00149     /** attribute name */
00150     const char *name;
00151     /** index into namespace array */
00152     int ns;
00153 
00154     /** attribute value */
00155     const char *value;
00156 
00157     /** next attribute */
00158     struct apr_xml_attr *next;
00159 };
00160 
00161 /** apr_xml_elem: holds a parsed XML element */
00162 struct apr_xml_elem {
00163     /** element name */
00164     const char *name;
00165     /** index into namespace array */
00166     int ns;
00167     /** xml:lang for attrs/contents */
00168     const char *lang;
00169 
00170     /** cdata right after start tag */
00171     apr_text_header first_cdata;
00172     /** cdata after MY end tag */
00173     apr_text_header following_cdata;
00174 
00175     /** parent element */
00176     struct apr_xml_elem *parent;        
00177     /** next (sibling) element */
00178     struct apr_xml_elem *next;  
00179     /** first child element */
00180     struct apr_xml_elem *first_child;
00181     /** first attribute */
00182     struct apr_xml_attr *attr;          
00183 
00184     /* used only during parsing */
00185     /** last child element */
00186     struct apr_xml_elem *last_child;
00187     /** namespaces scoped by this elem */
00188     struct apr_xml_ns_scope *ns_scope;
00189 
00190     /* used by modules during request processing */
00191     /** Place for modules to store private data */
00192     void *priv;
00193 };
00194 
00195 /** Is this XML element empty? */
00196 #define APR_XML_ELEM_IS_EMPTY(e) ((e)->first_child == NULL && \
00197                                   (e)->first_cdata.first == NULL)
00198 
00199 /** apr_xml_doc: holds a parsed XML document */
00200 struct apr_xml_doc {
00201     /** root element */
00202     apr_xml_elem *root; 
00203     /** array of namespaces used */
00204     apr_array_header_t *namespaces;
00205 };
00206 
00207 /** Opaque XML parser structure */
00208 typedef struct apr_xml_parser apr_xml_parser;
00209 
00210 /**
00211  * Create an XML parser
00212  * @param pool The pool for allocating the parser and the parse results.
00213  * @return The new parser.
00214  */
00215 APR_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool);
00216 
00217 /**
00218  * Parse a File, producing a xml_doc
00219  * @param p      The pool for allocating the parse results.
00220  * @param parser A pointer to *parser (needed so calling function can get
00221  *               errors), will be set to NULL on successful completion.
00222  * @param ppdoc  A pointer to *apr_xml_doc (which has the parsed results in it)
00223  * @param xmlfd  A file to read from.
00224  * @param buffer_length Buffer length which would be suitable 
00225  * @return Any errors found during parsing.
00226  */
00227 APR_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
00228                                              apr_xml_parser **parser,
00229                                              apr_xml_doc **ppdoc,
00230                                              apr_file_t *xmlfd,
00231                                              apr_size_t buffer_length);
00232 
00233 
00234 /**
00235  * Feed input into the parser
00236  * @param parser The XML parser for parsing this data.
00237  * @param data The data to parse.
00238  * @param len The length of the data.
00239  * @return Any errors found during parsing.
00240  * @remark Use apr_xml_parser_geterror() to get more error information.
00241  */
00242 APR_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
00243                                               const char *data,
00244                                               apr_size_t len);
00245 
00246 /**
00247  * Terminate the parsing and return the result
00248  * @param parser The XML parser for parsing this data.
00249  * @param pdoc The resulting parse information. May be NULL to simply
00250  *             terminate the parsing without fetching the info.
00251  * @return Any errors found during the final stage of parsing.
00252  * @remark Use apr_xml_parser_geterror() to get more error information.
00253  */
00254 APR_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
00255                                               apr_xml_doc **pdoc);
00256 
00257 /**
00258  * Fetch additional error information from the parser.
00259  * @param parser The XML parser to query for errors.
00260  * @param errbuf A buffer for storing error text.
00261  * @param errbufsize The length of the error text buffer.
00262  * @return The error buffer
00263  */
00264 APR_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
00265                                             char *errbuf,
00266                                             apr_size_t errbufsize);
00267 
00268 
00269 /**
00270  * Converts an XML element tree to flat text 
00271  * @param p The pool to allocate out of
00272  * @param elem The XML element to convert
00273  * @param style How to covert the XML.  One of:
00274  * <PRE>
00275  *     APR_XML_X2T_FULL                start tag, contents, end tag 
00276  *     APR_XML_X2T_INNER               contents only 
00277  *     APR_XML_X2T_LANG_INNER          xml:lang + inner contents 
00278  *     APR_XML_X2T_FULL_NS_LANG        FULL + ns defns + xml:lang 
00279  * </PRE>
00280  * @param namespaces The namespace of the current XML element
00281  * @param ns_map Namespace mapping
00282  * @param pbuf Buffer to put the converted text into
00283  * @param psize Size of the converted text
00284  */
00285 APR_DECLARE(void) apr_xml_to_text(apr_pool_t *p, const apr_xml_elem *elem,
00286                                   int style, apr_array_header_t *namespaces,
00287                                   int *ns_map, const char **pbuf,
00288                                   apr_size_t *psize);
00289 
00290 /* style argument values: */
00291 #define APR_XML_X2T_FULL         0      /**< start tag, contents, end tag */
00292 #define APR_XML_X2T_INNER        1      /**< contents only */
00293 #define APR_XML_X2T_LANG_INNER   2      /**< xml:lang + inner contents */
00294 #define APR_XML_X2T_FULL_NS_LANG 3      /**< FULL + ns defns + xml:lang */
00295 
00296 /**
00297  * empty XML element
00298  * @param p The pool to allocate out of
00299  * @param elem The XML element to empty
00300  * @return the string that was stored in the XML element
00301  */
00302 APR_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t *p,
00303                                              const apr_xml_elem *elem);
00304 
00305 /**
00306  * quote an XML string
00307  * Replace '<', '>', and '\&' with '\&lt;', '\&gt;', and '\&amp;'.
00308  * @param p The pool to allocate out of
00309  * @param s The string to quote
00310  * @param quotes If quotes is true, then replace '&quot;' with '\&quot;'.
00311  * @return The quoted string
00312  * @note If the string does not contain special characters, it is not
00313  * duplicated into the pool and the original string is returned.
00314  */
00315 APR_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
00316                                                int quotes);
00317 
00318 /**
00319  * Quote an XML element
00320  * @param p The pool to allocate out of
00321  * @param elem The element to quote
00322  */
00323 APR_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem);
00324 
00325 /* manage an array of unique URIs: apr_xml_insert_uri() and APR_XML_URI_ITEM() */
00326 
00327 /**
00328  * return the URI's (existing) index, or insert it and return a new index 
00329  * @param uri_array array to insert into
00330  * @param uri The uri to insert
00331  * @return int The uri's index
00332  */
00333 APR_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
00334                                     const char *uri);
00335 
00336 /** Get the URI item for this XML element */
00337 #define APR_XML_GET_URI_ITEM(ary, i) (((const char * const *)(ary)->elts)[i])
00338 
00339 #if APR_CHARSET_EBCDIC
00340 /**
00341  * Convert parsed tree in EBCDIC 
00342  * @param p The pool to allocate out of
00343  * @param pdoc The apr_xml_doc to convert.
00344  * @param xlate The translation handle to use.
00345  * @return Any errors found during conversion.
00346  */
00347 APR_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *p,
00348                                                      apr_xml_doc *pdoc,
00349                                                      apr_xlate_t *convset);
00350 #endif
00351 
00352 #ifdef __cplusplus
00353 }
00354 #endif
00355 /** @} */
00356 #endif /* APR_XML_H */
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines