* XML parser conformance to W3 spec

* Names lexically correct (NCName)
  * Syntactically Correct handling of '<?' (processing instructions) and '<?xml' (XML declaration)
  * XML prolog syntax for 'well-formed' XML
  * <!DOCTYPE (ie DTD) is not supported.
This commit is contained in:
Olof hagsand 2018-11-18 13:22:08 +01:00
parent 9bd0dc42c6
commit 9c57902b96
9 changed files with 280 additions and 69 deletions

View file

@ -101,6 +101,9 @@ The standards covered include:
- [Namespaces](https://www.w3.org/TR/2009/REC-xml-names-20091208) - [Namespaces](https://www.w3.org/TR/2009/REC-xml-names-20091208)
- [XPATH](https://www.w3.org/TR/xpath-10) - [XPATH](https://www.w3.org/TR/xpath-10)
Not supported:
- <!DOCTYPE
Yang Yang
==== ====
YANG and XML is at the heart of Clixon. Yang modules are used as a YANG and XML is at the heart of Clixon. Yang modules are used as a

View file

@ -99,10 +99,26 @@
* <x>a</<x> * <x>a</<x>
* <x>b</<x> * <x>b</<x>
* </c> * </c>
* From https://www.w3.org/TR/2009/REC-xml-names-20091208
* Definitions:
* - XML namespace: is identified by a URI reference [RFC3986]; element and
* attribute names may be placed in an XML namespace using the mechanisms
* described in this specification.
* - Expanded name: is a pair consisting of a namespace name and a local name.
* - Namespace name: For a name N in a namespace identified by a URI I, the
* "namespace name" is I.
* For a name N that is not in a namespace, the "namespace name" has no value.
* - Local name: In either case the "local name" is N.
* It is this combination of the universally managed URI namespace with the
* vocabulary's local names that is effective in avoiding name clashes.
*/ */
struct xml{ struct xml{
char *x_name; /* name of node */ char *x_name; /* name of node */
char *x_namespace; /* namespace, if any */ char *x_namespace; /* namespace, if any */
#ifdef notyet
char *x_namespacename; /* namespace name (or NULL) */
char *x_localname; /* Local name N as defined above */
#endif
struct xml *x_up; /* parent node in hierarchy if any */ struct xml *x_up; /* parent node in hierarchy if any */
struct xml **x_childvec; /* vector of children nodes */ struct xml **x_childvec; /* vector of children nodes */
int x_childvec_len;/* length of vector */ int x_childvec_len;/* length of vector */
@ -224,7 +240,7 @@ xmlns_check(cxobj *xn,
return NULL; return NULL;
} }
/*! Check namespace of xml node by searhing recursively among ancestors /*! Check namespace of xml node by searching recursively among ancestors
* @param[in] xn xml node * @param[in] xn xml node
* @param[in] namespace check validity of namespace * @param[in] namespace check validity of namespace
* @retval 0 Found / validated or no yang spec * @retval 0 Found / validated or no yang spec
@ -1258,6 +1274,7 @@ xmltree2cbuf(cbuf *cb,
* @see xml_parse_file * @see xml_parse_file
* @see xml_parse_string * @see xml_parse_string
* @see xml_parse_va * @see xml_parse_va
* @note special case is empty XML where the parser is not invoked.
*/ */
static int static int
_xml_parse(const char *str, _xml_parse(const char *str,
@ -1267,6 +1284,8 @@ _xml_parse(const char *str,
int retval = -1; int retval = -1;
struct xml_parse_yacc_arg ya = {0,}; struct xml_parse_yacc_arg ya = {0,};
if (strlen(str) == 0)
return 0; /* OK */
if (xt == NULL){ if (xt == NULL){
clicon_err(OE_XML, errno, "Unexpected NULL XML"); clicon_err(OE_XML, errno, "Unexpected NULL XML");
return -1; return -1;

View file

@ -34,6 +34,8 @@
* XML parser * XML parser
* @see https://www.w3.org/TR/2008/REC-xml-20081126 * @see https://www.w3.org/TR/2008/REC-xml-20081126
* https://www.w3.org/TR/2009/REC-xml-names-20091208 * https://www.w3.org/TR/2009/REC-xml-names-20091208
*
*/ */
%{ %{
@ -72,8 +74,22 @@ int clixon_xml_parsewrap(void)
return 1; return 1;
} }
/*
* From https://www.w3.org/TR/2008/REC-xml-20081126:
* [4]* NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] ...
* [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7|
* [5] Name ::= NameStartChar (NameChar)*
* NOTE: From https://www.w3.org/TR/2009/REC-xml-names-20091208:
* [4] NCName ::= Name - (Char* ':' Char*) An XML Name, minus the ":"
* --> namestart and name below is NCNAME
*/
%} %}
namestart [A-Z_a-z]
namechar [A-Z_a-z\-\.0-9]
ncname {namestart}{namechar}*
%x START %x START
%s STATEA %s STATEA
%s AMPERSAND %s AMPERSAND
@ -81,36 +97,42 @@ int clixon_xml_parsewrap(void)
%s CMNT %s CMNT
%s STR %s STR
%s TEXTDECL %s TEXTDECL
%s PIDECL
%s PIDECL2
%s STRDQ %s STRDQ
%s STRSQ %s STRSQ
%% %%
<START>[0-9A-Za-z_\-]+ { clixon_xml_parselval.string = strdup(yytext);
<START,TEXTDECL>[ \t] ;
<START,STATEA,CMNT,TEXTDECL>\n { _YA->ya_linenum++; }
<START>{ncname} { clixon_xml_parselval.string = strdup(yytext);
return NAME; /* rather be catch-all */ return NAME; /* rather be catch-all */
} }
<START>[ \t]+ ;
<START>\: return *clixon_xml_parsetext; <START>\: return *clixon_xml_parsetext;
<START>\n { _YA->ya_linenum++;} <START>\n { _YA->ya_linenum++;}
<START>"<?xml" { BEGIN(TEXTDECL); return BTEXT;} <START><<EOF>> { return MY_EOF; }
<START>"<?xml" { BEGIN(TEXTDECL); return BXMLDCL;}
<START>"<?" { BEGIN(PIDECL); return BQMARK;}
<START>"/>" { BEGIN(STATEA); return ESLASH; } <START>"/>" { BEGIN(STATEA); return ESLASH; }
<START>"<!--" { BEGIN(CMNT); return BCOMMENT; } <START>"<!--" { BEGIN(CMNT); return BCOMMENT; }
<START>"</" return BSLASH; <START>"</" return BSLASH;
<START>[/=] return *clixon_xml_parsetext; <START>[/=] return *clixon_xml_parsetext;
<START>\< return *clixon_xml_parsetext; <START>\< return *clixon_xml_parsetext;
<START>\> { BEGIN(STATEA); return *clixon_xml_parsetext; } <START>\> { BEGIN(STATEA); return *clixon_xml_parsetext; }
<START>\" { _YA->ya_lex_state=START;BEGIN(STRDQ); return *clixon_xml_parsetext; } <START>\" { _YA->ya_lex_state=START;BEGIN(STRDQ); return *clixon_xml_parsetext; }
<START>\' { _YA->ya_lex_state=START;BEGIN(STRSQ); return *clixon_xml_parsetext; } <START>\' { _YA->ya_lex_state=START;BEGIN(STRSQ); return *clixon_xml_parsetext; }
<START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/ } <START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
<STATEA>"</" { BEGIN(START); return BSLASH; } <STATEA>"</" { BEGIN(START); return BSLASH; }
<STATEA><<EOF>> { return MY_EOF; }
<STATEA>"<!--" { BEGIN(CMNT); return BCOMMENT; } <STATEA>"<!--" { BEGIN(CMNT); return BCOMMENT; }
<STATEA>"<![CDATA[" { BEGIN(CDATA); _YA->ya_lex_state = STATEA; clixon_xml_parselval.string = yytext; return CHARDATA;} <STATEA>"<![CDATA[" { BEGIN(CDATA); _YA->ya_lex_state = STATEA; clixon_xml_parselval.string = yytext; return CHARDATA;}
<STATEA>"<?" { BEGIN(PIDECL); return BQMARK; }
<STATEA>\< { BEGIN(START); return *clixon_xml_parsetext; } <STATEA>\< { BEGIN(START); return *clixon_xml_parsetext; }
<STATEA>& { _YA->ya_lex_state =STATEA;BEGIN(AMPERSAND);} <STATEA>& { _YA->ya_lex_state =STATEA;BEGIN(AMPERSAND);}
<STATEA>\n { clixon_xml_parselval.string = yytext;_YA->ya_linenum++; return (CHARDATA);} <STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; }
<STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/}
/* @see xml_chardata_encode */ /* @see xml_chardata_encode */
<AMPERSAND>"amp;" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = "&"; return CHARDATA;} <AMPERSAND>"amp;" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = "&"; return CHARDATA;}
@ -124,21 +146,30 @@ int clixon_xml_parsewrap(void)
<CDATA>"]]>" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = yytext; return CHARDATA;} <CDATA>"]]>" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = yytext; return CHARDATA;}
<CMNT>"-->" { BEGIN(START); return ECOMMENT; } <CMNT>"-->" { BEGIN(START); return ECOMMENT; }
<CMNT>\n _YA->ya_linenum++;
<CMNT>. <CMNT>.
<TEXTDECL>encoding return ENC; <TEXTDECL>encoding return ENC;
<TEXTDECL>version return VER; <TEXTDECL>version return VER;
<TEXTDECL>"=" return *clixon_xml_parsetext; <TEXTDECL>standalone return SD;
<TEXTDECL>"?>" { BEGIN(START);return ETEXT;} <TEXTDECL>"=" { return *clixon_xml_parsetext; }
<TEXTDECL>"?>" { BEGIN(START);return EQMARK;}
<TEXTDECL>\" { _YA->ya_lex_state =TEXTDECL;BEGIN(STRDQ); return *clixon_xml_parsetext; } <TEXTDECL>\" { _YA->ya_lex_state =TEXTDECL;BEGIN(STRDQ); return *clixon_xml_parsetext; }
<TEXTDECL>\' { _YA->ya_lex_state =TEXTDECL;BEGIN(STRSQ); return *clixon_xml_parsetext; } <TEXTDECL>\' { _YA->ya_lex_state =TEXTDECL;BEGIN(STRSQ); return *clixon_xml_parsetext; }
<TEXTDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; } <PIDECL>{ncname} { clixon_xml_parselval.string = strdup(yytext);
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; } return NAME; /* rather be catch-all */
}
<PIDECL>[ \t] { BEGIN(PIDECL2);}
<PIDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
<PIDECL2>"?>" { BEGIN(START);return EQMARK;}
<PIDECL2>[^{?>}]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
<STRDQ>\" { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; } <STRDQ>\" { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
<STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; } <STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
<STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; } <STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
<STRSQ>\' { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; } <STRSQ>\' { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
%% %%

View file

@ -39,12 +39,13 @@
char *string; char *string;
} }
%start topxml %start document
%token <string> NAME CHARDATA %token <string> NAME CHARDATA STRING
%token VER ENC %token MY_EOF
%token VER ENC SD
%token BSLASH ESLASH %token BSLASH ESLASH
%token BTEXT ETEXT %token BXMLDCL BQMARK EQMARK
%token BCOMMENT ECOMMENT %token BCOMMENT ECOMMENT
%type <string> attvalue %type <string> attvalue
@ -120,6 +121,7 @@ xml_parse_version(struct xml_parse_yacc_arg *ya,
free(ver); free(ver);
return -1; return -1;
} }
if (ver)
free(ver); free(ver);
return 0; return 0;
} }
@ -299,6 +301,11 @@ xml_parse_bslash2(struct xml_parse_yacc_arg *ya,
return retval; return retval;
} }
/*! Parse XML attribute
* Special cases:
* - DefaultAttName: xmlns
* - PrefixedAttName: xmlns:NAME
*/
static int static int
xml_parse_attr(struct xml_parse_yacc_arg *ya, xml_parse_attr(struct xml_parse_yacc_arg *ya,
char *prefix, char *prefix,
@ -308,6 +315,12 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
int retval = -1; int retval = -1;
cxobj *xa; cxobj *xa;
#ifdef notyet
if (prefix && strcmp(prefix,"xmlns")==0)
fprintf(stderr, "PrefixedAttName NCNAME:%s = %s\n", name, attval);
if (prefix==NULL && strcmp(name,"xmlns")==0)
fprintf(stderr, "DefaultAttName = %s\n", attval);
#endif /* notyet */
if ((xa = xml_new(name, ya->ya_xelement, NULL)) == NULL) if ((xa = xml_new(name, ya->ya_xelement, NULL)) == NULL)
goto done; goto done;
xml_type_set(xa, CX_ATTR); xml_type_set(xa, CX_ATTR);
@ -327,69 +340,100 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
%} %}
%% %%
/* [1] document ::= prolog element Misc* */
topxml : list document : prolog element misclist MY_EOF
{ clicon_debug(3, "topxml->list ACCEPT"); { clicon_debug(2, "document->prolog element misc* ACCEPT");
YYACCEPT; } YYACCEPT; }
| dcl list | elist MY_EOF
{ clicon_debug(3, "topxml->dcl list ACCEPT"); { clicon_debug(2, "document->elist ACCEPT"); /* internal exception*/
YYACCEPT; } YYACCEPT; }
; ;
/* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? */
dcl : BTEXT info encode ETEXT { clicon_debug(3, "dcl->info encode"); } prolog : xmldcl misclist
{ clicon_debug(2, "prolog->xmldcl misc*"); }
| misclist
{ clicon_debug(2, "prolog->misc*"); }
; ;
info : VER '=' '\"' CHARDATA '\"' misclist : misclist misc { clicon_debug(2, "misclist->misclist misc"); }
{ if (xml_parse_version(_YA, $4) <0) YYABORT; } | { clicon_debug(2, "misclist->"); }
| VER '=' '\'' CHARDATA '\'' ;
{ if (xml_parse_version(_YA, $4) <0) YYABORT; }
/* [27] Misc ::= Comment | PI | S */
misc : comment { clicon_debug(2, "misc->comment"); }
| pi { clicon_debug(2, "misc->pi"); }
;
xmldcl : BXMLDCL verinfo encodingdecl sddecl EQMARK
{ clicon_debug(2, "xmldcl->verinfo encodingdecl? sddecl?"); }
;
verinfo : VER '=' '\"' STRING '\"'
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
clicon_debug(2, "verinfo->version=\"STRING\"");}
| VER '=' '\'' STRING '\''
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
clicon_debug(2, "verinfo->version='STRING'");}
;
encodingdecl : ENC '=' '\"' STRING '\"' {if ($4)free($4);}
| ENC '=' '\'' STRING '\'' {if ($4)free($4);}
| |
; ;
encode : ENC '=' '\"' CHARDATA '\"' {free($4);} sddecl : SD '=' '\"' STRING '\"' {if ($4)free($4);}
| ENC '=' '\'' CHARDATA '\'' {free($4);} | SD '=' '\'' STRING '\'' {if ($4)free($4);}
|
; ;
/* [39] element ::= EmptyElemTag | STag content ETag */
element : '<' qname attrs element1 element : '<' qname attrs element1
{ clicon_debug(3, "element -> < qname attrs element1"); } { clicon_debug(2, "element -> < qname attrs element1"); }
; ;
qname : NAME { if (xml_parse_unprefixed_name(_YA, $1) < 0) YYABORT; qname : NAME { if (xml_parse_unprefixed_name(_YA, $1) < 0) YYABORT;
clicon_debug(3, "qname -> NAME %s", $1);} clicon_debug(2, "qname -> NAME %s", $1);}
| NAME ':' NAME { if (xml_parse_prefixed_name(_YA, $1, $3) < 0) YYABORT; | NAME ':' NAME { if (xml_parse_prefixed_name(_YA, $1, $3) < 0) YYABORT;
clicon_debug(3, "qname -> NAME : NAME");} clicon_debug(2, "qname -> NAME : NAME");}
; ;
element1 : ESLASH {_YA->ya_xelement = NULL; element1 : ESLASH {_YA->ya_xelement = NULL;
clicon_debug(3, "element1 -> />");} clicon_debug(2, "element1 -> />");}
| '>' { xml_parse_endslash_pre(_YA); } | '>' { xml_parse_endslash_pre(_YA); }
list { xml_parse_endslash_mid(_YA); } elist { xml_parse_endslash_mid(_YA); }
etg { xml_parse_endslash_post(_YA); endtag { xml_parse_endslash_post(_YA);
clicon_debug(3, "element1 -> > list etg");} clicon_debug(2, "element1 -> > elist endtag");}
; ;
etg : BSLASH NAME '>' endtag : BSLASH NAME '>'
{ clicon_debug(3, "etg -> < </ NAME %s>", $2); if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; } { clicon_debug(2, "endtag -> < </ NAME>");
if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; }
| BSLASH NAME ':' NAME '>' | BSLASH NAME ':' NAME '>'
{ if (xml_parse_bslash2(_YA, $2, $4) < 0) YYABORT; { if (xml_parse_bslash2(_YA, $2, $4) < 0) YYABORT;
clicon_debug(3, "etg -> < </ NAME:NAME >"); } clicon_debug(2, "endtag -> < </ NAME:NAME >"); }
; ;
list : list content { clicon_debug(3, "list -> list content"); } elist : elist content { clicon_debug(2, "elist -> elist content"); }
| content { clicon_debug(3, "list -> content"); } | content { clicon_debug(2, "elist -> content"); }
; ;
content : element { clicon_debug(3, "content -> element"); } /* Rule 43 */
| comment { clicon_debug(3, "content -> comment"); } content : element { clicon_debug(2, "content -> element"); }
| comment { clicon_debug(2, "content -> comment"); }
| pi { clicon_debug(2, "content -> pi"); }
| CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT; | CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT;
clicon_debug(3, "content -> CHARDATA %s", $1); } clicon_debug(2, "content -> CHARDATA %s", $1); }
| { clicon_debug(3, "content -> "); } | { clicon_debug(2, "content -> "); }
; ;
comment : BCOMMENT ECOMMENT comment : BCOMMENT ECOMMENT
; ;
pi : BQMARK NAME EQMARK {clicon_debug(2, "pi -> <? NAME ?>"); free($2); }
| BQMARK NAME STRING EQMARK
{clicon_debug(2, "pi -> <? NAME STRING ?>"); free($2); free($3);}
;
attrs : attrs attr attrs : attrs attr
| |
@ -399,9 +443,9 @@ attr : NAME '=' attvalue { if (xml_parse_attr(_YA, NULL, $1, $3)
| NAME ':' NAME '=' attvalue { if (xml_parse_attr(_YA, $1, $3, $5) < 0) YYABORT; } | NAME ':' NAME '=' attvalue { if (xml_parse_attr(_YA, $1, $3, $5) < 0) YYABORT; }
; ;
attvalue : '\"' CHARDATA '\"' { $$=$2; /* $2 must be consumed */} attvalue : '\"' STRING '\"' { $$=$2; /* $2 must be consumed */}
| '\"' '\"' { $$=strdup(""); /* $2 must be consumed */} | '\"' '\"' { $$=strdup(""); /* $2 must be consumed */}
| '\'' CHARDATA '\'' { $$=$2; /* $2 must be consumed */} | '\'' STRING '\'' { $$=$2; /* $2 must be consumed */}
| '\'' '\'' { $$=strdup(""); /* $2 must be consumed */} | '\'' '\'' { $$=strdup(""); /* $2 must be consumed */}
; ;

View file

@ -78,6 +78,9 @@ expectfn(){
expect2= expect2=
fi fi
ret=$($cmd) ret=$($cmd)
# echo "cmd:\"$cmd\""
# echo "retval:\"$retval\""
# echo "ret:\"$ret\""
if [ $? -ne $retval ]; then if [ $? -ne $retval ]; then
echo -e "\e[31m\nError in Test$testnr [$testname]:" echo -e "\e[31m\nError in Test$testnr [$testname]:"
echo -e "\e[0m:" echo -e "\e[0m:"
@ -134,11 +137,15 @@ $input
EOF EOF
) )
r=$? r=$?
if [ $r -ne $retval ]; then if [ $r != $retval ]; then
echo -e "\e[31m\nError ($r != $retval) in Test$testnr [$testname]:" echo -e "\e[31m\nError ($r != $retval) in Test$testnr [$testname]:"
echo -e "\e[0m:" echo -e "\e[0m:"
exit -1 exit -1
fi fi
# If error dont match output strings
if [ $r != 0 ]; then
return
fi
# Match if both are empty string # Match if both are empty string
if [ -z "$ret" -a -z "$expect" ]; then if [ -z "$ret" -a -z "$expect" ]; then

View file

@ -1,5 +1,7 @@
#!/bin/bash #!/bin/bash
# Test: XML parser tests # Test: XML parser tests
# @see https://www.w3.org/TR/2008/REC-xml-20081126
# https://www.w3.org/TR/2009/REC-xml-names-20091208
#PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml" #PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml"
PROG=../util/clixon_util_xml PROG=../util/clixon_util_xml
@ -9,6 +11,18 @@ PROG=../util/clixon_util_xml
new "xml parse" new "xml parse"
expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$" expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$"
new "xml parse strange names"
expecteof "$PROG" 0 "<_-><b0.><c-.-._/></b0.></_->" "^<_-><b0.><c-.-._/></b0.></_->$"
new "xml parse name errors"
expecteof "$PROG" 255 "<-a/>" ""
new "xml parse name errors"
expecteof "$PROG" 255 "<9/>" ""
new "xml parse name errors"
expecteof "$PROG" 255 "<a%/>" ""
XML=$(cat <<EOF XML=$(cat <<EOF
<a><description>An example of escaped CENDs</description> <a><description>An example of escaped CENDs</description>
<sometext> <sometext>
@ -52,5 +66,75 @@ expecteof "$PROG" 0 "<x a='t'/>" '^<x a="t"/>$'
new "Mixed quotes" new "Mixed quotes"
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$' expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$'
new "XMLdecl version"
expecteof "$PROG" 0 '<?xml version="1.0"?><a/>' '<a/>'
new "XMLdecl version, single quotes"
expecteof "$PROG" 0 "<?xml version='1.0'?><a/>" '<a/>'
new "XMLdecl version no element"
expecteof "$PROG" 255 '<?xml version="1.0"?>' ''
new "XMLdecl no version"
expecteof "$PROG" 255 '<?xml ?><a/>' ''
new "XMLdecl misspelled version"
expecteof "$PROG" 255 '<?xml verion="1.0"?><a/>' '<a/>'
new "XMLdecl version + encoding"
expecteof "$PROG" 0 '<?xml version="1.0" encoding="UTF-16"?><a/>' '<a/>'
new "XMLdecl version + misspelled encoding"
expecteof "$PROG" 255 '<?xml version="1.0" encding="UTF-16"?><a/>' '<a/>'
new "XMLdecl version + standalone"
expecteof "$PROG" 0 '<?xml version="1.0" standalone="yes"?><a/>' '<a/>'
new "PI - Processing instruction empty"
expecteof "$PROG" 0 '<?foo ?><a/>' '<a/>'
new "PI some content"
expecteof "$PROG" 0 '<?foo something else ?><a/>' '<a/>'
new "prolog element misc*"
expecteof "$PROG" 0 '<?foo something ?><a/><?bar more stuff ?><!-- a comment-->' '<a/>'
# We allow it as an internal necessity for parsing of xml fragments
#new "double element error"
#expecteof "$PROG" 255 '<a/><b/>' ''
new "namespace: DefaultAttName"
expecteof "$PROG" 0 '<x xmlns="n1">hello</x>' '^<x xmlns="n1">hello</x>$'
new "namespace: PrefixedAttName"
expecteof "$PROG" 0 '<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>' '^<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>$'
new "First example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
XML=$(cat <<EOF
<?xml version="1.0"?>
<html:html xmlns:html='http://www.w3.org/1999/xhtml'>
<html:head><html:title>Frobnostication</html:title></html:head>
<html:body><html:p>Moved to
<html:a href='http://frob.example.com'>here.</html:a></html:p></html:body>
</html:html>
EOF
)
expecteof "$PROG" 0 "$XML" "$XML"
new "Second example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
XML=$(cat <<EOF
<?xml version="1.0"?>
<!-- both namespace prefixes are available throughout -->
<bk:book xmlns:bk='urn:loc.gov:books'
xmlns:isbn='urn:ISBN:0-395-36341-6'>
<bk:title>Cheaper by the Dozen</bk:title>
<isbn:number>1568491379</isbn:number>
</bk:book>
EOF
)
expecteof "$PROG" 0 "$XML" "$XML"
rm -rf $dir rm -rf $dir

View file

@ -78,7 +78,7 @@ all: $(APPS)
@echo "You may want to make clixon_util_stream separately (curl dependency)" @echo "You may want to make clixon_util_stream separately (curl dependency)"
clean: clean:
rm -f $(APPS) *.core rm -f $(APPS) clixon_util_stream *.core
# APPS # APPS
clixon_util_xml: clixon_util_xml.c $(MYLIB) clixon_util_xml: clixon_util_xml.c $(MYLIB)

View file

@ -48,6 +48,7 @@
#include <limits.h> #include <limits.h>
#include <fnmatch.h> #include <fnmatch.h>
#include <stdint.h> #include <stdint.h>
#include <syslog.h>
#include <assert.h> #include <assert.h>
/* cligen */ /* cligen */
@ -68,20 +69,39 @@
static int static int
usage(char *argv0) usage(char *argv0)
{ {
fprintf(stderr, "usage:%s.\n\tInput on stdin\n", argv0); fprintf(stderr, "usage:%s [options]\n"
"where options are\n"
"\t-h \t\tHelp\n"
"\t-D <level> \tDebug\n",
argv0);
exit(0); exit(0);
} }
int int
main(int argc, char **argv) main(int argc,
char **argv)
{ {
cxobj *xt = NULL; cxobj *xt = NULL;
cxobj *xc; cxobj *xc;
cbuf *cb = cbuf_new(); cbuf *cb = cbuf_new();
int retval = -1;
char c;
if (argc != 1){ clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR);
optind = 1;
opterr = 0;
while ((c = getopt(argc, argv, "hD:")) != -1)
switch (c) {
case 'h':
usage(argv[0]); usage(argv[0]);
return 0; break;
case 'D':
if (sscanf(optarg, "%d", &debug) != 1)
usage(argv[0]);
break;
default:
usage(argv[0]);
break;
} }
if (xml_parse_file(0, "</config>", NULL, &xt) < 0){ if (xml_parse_file(0, "</config>", NULL, &xt) < 0){
fprintf(stderr, "xml parse error %s\n", clicon_err_reason); fprintf(stderr, "xml parse error %s\n", clicon_err_reason);
@ -90,18 +110,20 @@ main(int argc, char **argv)
xc = NULL; xc = NULL;
while ((xc = xml_child_each(xt, xc, -1)) != NULL) while ((xc = xml_child_each(xt, xc, -1)) != NULL)
clicon_xml2cbuf(cb, xc, 0, 0); /* print xml */ clicon_xml2cbuf(cb, xc, 0, 0); /* print xml */
fprintf(stdout, "%s\n", cbuf_get(cb)); fprintf(stdout, "%s", cbuf_get(cb));
fflush(stdout);
#if 0 #if 0
cbuf_reset(cb); cbuf_reset(cb);
xmltree2cbuf(cb, xt, 0); /* dump data structures */ xmltree2cbuf(cb, xt, 0); /* dump data structures */
fprintf(stderr, "%s\n", cbuf_get(cb)); fprintf(stderr, "%s\n", cbuf_get(cb));
#endif #endif
retval = 0;
done: done:
if (xt) if (xt)
xml_free(xt); xml_free(xt);
if (cb) if (cb)
cbuf_free(cb); cbuf_free(cb);
return 0; return retval;
} }

View file

@ -133,13 +133,14 @@ main(int argc, char **argv)
clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR); clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR);
optind = 1; optind = 1;
opterr = 0; opterr = 0;
while ((c = getopt(argc, argv, "hDf:p:i:")) != -1) while ((c = getopt(argc, argv, "hD:f:p:i:")) != -1)
switch (c) { switch (c) {
case 'h': case 'h':
usage(argv0); usage(argv0);
break; break;
case 'D': case 'D':
debug++; if (sscanf(optarg, "%d", &debug) != 1)
usage(argv0);
break; break;
case 'f': /* XML file */ case 'f': /* XML file */
filename = optarg; filename = optarg;