* XML parser conformance to W3 spec
* Names lexically correct (NCName) * Syntactically Correct handling of '<?' (processing instructions) and '<?xml' (XML declaration) * XML prolog syntax for 'well-formed' XML * <!DOCTYPE (ie DTD) is not supported.
This commit is contained in:
parent
9bd0dc42c6
commit
9c57902b96
9 changed files with 280 additions and 69 deletions
|
|
@ -101,6 +101,9 @@ The standards covered include:
|
||||||
- [Namespaces](https://www.w3.org/TR/2009/REC-xml-names-20091208)
|
- [Namespaces](https://www.w3.org/TR/2009/REC-xml-names-20091208)
|
||||||
- [XPATH](https://www.w3.org/TR/xpath-10)
|
- [XPATH](https://www.w3.org/TR/xpath-10)
|
||||||
|
|
||||||
|
Not supported:
|
||||||
|
- <!DOCTYPE
|
||||||
|
|
||||||
Yang
|
Yang
|
||||||
====
|
====
|
||||||
YANG and XML is at the heart of Clixon. Yang modules are used as a
|
YANG and XML is at the heart of Clixon. Yang modules are used as a
|
||||||
|
|
|
||||||
|
|
@ -99,10 +99,26 @@
|
||||||
* <x>a</<x>
|
* <x>a</<x>
|
||||||
* <x>b</<x>
|
* <x>b</<x>
|
||||||
* </c>
|
* </c>
|
||||||
|
* From https://www.w3.org/TR/2009/REC-xml-names-20091208
|
||||||
|
* Definitions:
|
||||||
|
* - XML namespace: is identified by a URI reference [RFC3986]; element and
|
||||||
|
* attribute names may be placed in an XML namespace using the mechanisms
|
||||||
|
* described in this specification.
|
||||||
|
* - Expanded name: is a pair consisting of a namespace name and a local name.
|
||||||
|
* - Namespace name: For a name N in a namespace identified by a URI I, the
|
||||||
|
* "namespace name" is I.
|
||||||
|
* For a name N that is not in a namespace, the "namespace name" has no value.
|
||||||
|
* - Local name: In either case the "local name" is N.
|
||||||
|
* It is this combination of the universally managed URI namespace with the
|
||||||
|
* vocabulary's local names that is effective in avoiding name clashes.
|
||||||
*/
|
*/
|
||||||
struct xml{
|
struct xml{
|
||||||
char *x_name; /* name of node */
|
char *x_name; /* name of node */
|
||||||
char *x_namespace; /* namespace, if any */
|
char *x_namespace; /* namespace, if any */
|
||||||
|
#ifdef notyet
|
||||||
|
char *x_namespacename; /* namespace name (or NULL) */
|
||||||
|
char *x_localname; /* Local name N as defined above */
|
||||||
|
#endif
|
||||||
struct xml *x_up; /* parent node in hierarchy if any */
|
struct xml *x_up; /* parent node in hierarchy if any */
|
||||||
struct xml **x_childvec; /* vector of children nodes */
|
struct xml **x_childvec; /* vector of children nodes */
|
||||||
int x_childvec_len;/* length of vector */
|
int x_childvec_len;/* length of vector */
|
||||||
|
|
@ -224,7 +240,7 @@ xmlns_check(cxobj *xn,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! Check namespace of xml node by searhing recursively among ancestors
|
/*! Check namespace of xml node by searching recursively among ancestors
|
||||||
* @param[in] xn xml node
|
* @param[in] xn xml node
|
||||||
* @param[in] namespace check validity of namespace
|
* @param[in] namespace check validity of namespace
|
||||||
* @retval 0 Found / validated or no yang spec
|
* @retval 0 Found / validated or no yang spec
|
||||||
|
|
@ -1258,6 +1274,7 @@ xmltree2cbuf(cbuf *cb,
|
||||||
* @see xml_parse_file
|
* @see xml_parse_file
|
||||||
* @see xml_parse_string
|
* @see xml_parse_string
|
||||||
* @see xml_parse_va
|
* @see xml_parse_va
|
||||||
|
* @note special case is empty XML where the parser is not invoked.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
_xml_parse(const char *str,
|
_xml_parse(const char *str,
|
||||||
|
|
@ -1267,6 +1284,8 @@ _xml_parse(const char *str,
|
||||||
int retval = -1;
|
int retval = -1;
|
||||||
struct xml_parse_yacc_arg ya = {0,};
|
struct xml_parse_yacc_arg ya = {0,};
|
||||||
|
|
||||||
|
if (strlen(str) == 0)
|
||||||
|
return 0; /* OK */
|
||||||
if (xt == NULL){
|
if (xt == NULL){
|
||||||
clicon_err(OE_XML, errno, "Unexpected NULL XML");
|
clicon_err(OE_XML, errno, "Unexpected NULL XML");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@
|
||||||
* XML parser
|
* XML parser
|
||||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126
|
||||||
* https://www.w3.org/TR/2009/REC-xml-names-20091208
|
* https://www.w3.org/TR/2009/REC-xml-names-20091208
|
||||||
|
*
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
%{
|
%{
|
||||||
|
|
@ -72,8 +74,22 @@ int clixon_xml_parsewrap(void)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* From https://www.w3.org/TR/2008/REC-xml-20081126:
|
||||||
|
* [4]* NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] ...
|
||||||
|
* [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7|
|
||||||
|
* [5] Name ::= NameStartChar (NameChar)*
|
||||||
|
* NOTE: From https://www.w3.org/TR/2009/REC-xml-names-20091208:
|
||||||
|
* [4] NCName ::= Name - (Char* ':' Char*) An XML Name, minus the ":"
|
||||||
|
* --> namestart and name below is NCNAME
|
||||||
|
*/
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
namestart [A-Z_a-z]
|
||||||
|
namechar [A-Z_a-z\-\.0-9]
|
||||||
|
ncname {namestart}{namechar}*
|
||||||
|
|
||||||
%x START
|
%x START
|
||||||
%s STATEA
|
%s STATEA
|
||||||
%s AMPERSAND
|
%s AMPERSAND
|
||||||
|
|
@ -81,36 +97,42 @@ int clixon_xml_parsewrap(void)
|
||||||
%s CMNT
|
%s CMNT
|
||||||
%s STR
|
%s STR
|
||||||
%s TEXTDECL
|
%s TEXTDECL
|
||||||
|
%s PIDECL
|
||||||
|
%s PIDECL2
|
||||||
%s STRDQ
|
%s STRDQ
|
||||||
%s STRSQ
|
%s STRSQ
|
||||||
|
|
||||||
%%
|
%%
|
||||||
<START>[0-9A-Za-z_\-]+ { clixon_xml_parselval.string = strdup(yytext);
|
|
||||||
|
<START,TEXTDECL>[ \t] ;
|
||||||
|
<START,STATEA,CMNT,TEXTDECL>\n { _YA->ya_linenum++; }
|
||||||
|
|
||||||
|
<START>{ncname} { clixon_xml_parselval.string = strdup(yytext);
|
||||||
return NAME; /* rather be catch-all */
|
return NAME; /* rather be catch-all */
|
||||||
}
|
}
|
||||||
<START>[ \t]+ ;
|
|
||||||
<START>\: return *clixon_xml_parsetext;
|
<START>\: return *clixon_xml_parsetext;
|
||||||
<START>\n { _YA->ya_linenum++;}
|
<START>\n { _YA->ya_linenum++;}
|
||||||
<START>"<?xml" { BEGIN(TEXTDECL); return BTEXT;}
|
<START><<EOF>> { return MY_EOF; }
|
||||||
|
<START>"<?xml" { BEGIN(TEXTDECL); return BXMLDCL;}
|
||||||
|
<START>"<?" { BEGIN(PIDECL); return BQMARK;}
|
||||||
<START>"/>" { BEGIN(STATEA); return ESLASH; }
|
<START>"/>" { BEGIN(STATEA); return ESLASH; }
|
||||||
<START>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
<START>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
||||||
<START>"</" return BSLASH;
|
<START>"</" return BSLASH;
|
||||||
<START>[/=] return *clixon_xml_parsetext;
|
<START>[/=] return *clixon_xml_parsetext;
|
||||||
<START>\< return *clixon_xml_parsetext;
|
<START>\< return *clixon_xml_parsetext;
|
||||||
<START>\> { BEGIN(STATEA); return *clixon_xml_parsetext; }
|
<START>\> { BEGIN(STATEA); return *clixon_xml_parsetext; }
|
||||||
|
|
||||||
<START>\" { _YA->ya_lex_state=START;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
<START>\" { _YA->ya_lex_state=START;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
||||||
<START>\' { _YA->ya_lex_state=START;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
<START>\' { _YA->ya_lex_state=START;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
||||||
<START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/ }
|
<START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||||
|
|
||||||
<STATEA>"</" { BEGIN(START); return BSLASH; }
|
<STATEA>"</" { BEGIN(START); return BSLASH; }
|
||||||
|
<STATEA><<EOF>> { return MY_EOF; }
|
||||||
<STATEA>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
<STATEA>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
||||||
<STATEA>"<![CDATA[" { BEGIN(CDATA); _YA->ya_lex_state = STATEA; clixon_xml_parselval.string = yytext; return CHARDATA;}
|
<STATEA>"<![CDATA[" { BEGIN(CDATA); _YA->ya_lex_state = STATEA; clixon_xml_parselval.string = yytext; return CHARDATA;}
|
||||||
|
<STATEA>"<?" { BEGIN(PIDECL); return BQMARK; }
|
||||||
<STATEA>\< { BEGIN(START); return *clixon_xml_parsetext; }
|
<STATEA>\< { BEGIN(START); return *clixon_xml_parsetext; }
|
||||||
<STATEA>& { _YA->ya_lex_state =STATEA;BEGIN(AMPERSAND);}
|
<STATEA>& { _YA->ya_lex_state =STATEA;BEGIN(AMPERSAND);}
|
||||||
<STATEA>\n { clixon_xml_parselval.string = yytext;_YA->ya_linenum++; return (CHARDATA);}
|
<STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; }
|
||||||
|
|
||||||
<STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/}
|
|
||||||
|
|
||||||
/* @see xml_chardata_encode */
|
/* @see xml_chardata_encode */
|
||||||
<AMPERSAND>"amp;" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = "&"; return CHARDATA;}
|
<AMPERSAND>"amp;" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = "&"; return CHARDATA;}
|
||||||
|
|
@ -124,21 +146,30 @@ int clixon_xml_parsewrap(void)
|
||||||
<CDATA>"]]>" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = yytext; return CHARDATA;}
|
<CDATA>"]]>" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = yytext; return CHARDATA;}
|
||||||
|
|
||||||
<CMNT>"-->" { BEGIN(START); return ECOMMENT; }
|
<CMNT>"-->" { BEGIN(START); return ECOMMENT; }
|
||||||
<CMNT>\n _YA->ya_linenum++;
|
|
||||||
<CMNT>.
|
<CMNT>.
|
||||||
<TEXTDECL>encoding return ENC;
|
<TEXTDECL>encoding return ENC;
|
||||||
<TEXTDECL>version return VER;
|
<TEXTDECL>version return VER;
|
||||||
<TEXTDECL>"=" return *clixon_xml_parsetext;
|
<TEXTDECL>standalone return SD;
|
||||||
<TEXTDECL>"?>" { BEGIN(START);return ETEXT;}
|
<TEXTDECL>"=" { return *clixon_xml_parsetext; }
|
||||||
|
<TEXTDECL>"?>" { BEGIN(START);return EQMARK;}
|
||||||
<TEXTDECL>\" { _YA->ya_lex_state =TEXTDECL;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
<TEXTDECL>\" { _YA->ya_lex_state =TEXTDECL;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
||||||
<TEXTDECL>\' { _YA->ya_lex_state =TEXTDECL;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
<TEXTDECL>\' { _YA->ya_lex_state =TEXTDECL;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
||||||
|
<TEXTDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||||
|
|
||||||
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
<PIDECL>{ncname} { clixon_xml_parselval.string = strdup(yytext);
|
||||||
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
return NAME; /* rather be catch-all */
|
||||||
|
}
|
||||||
|
<PIDECL>[ \t] { BEGIN(PIDECL2);}
|
||||||
|
<PIDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||||
|
<PIDECL2>"?>" { BEGIN(START);return EQMARK;}
|
||||||
|
<PIDECL2>[^{?>}]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||||
|
|
||||||
|
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||||
|
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||||
<STRDQ>\" { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
<STRDQ>\" { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
||||||
|
|
||||||
<STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
<STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||||
<STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
<STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||||
<STRSQ>\' { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
<STRSQ>\' { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
|
||||||
|
|
@ -39,12 +39,13 @@
|
||||||
char *string;
|
char *string;
|
||||||
}
|
}
|
||||||
|
|
||||||
%start topxml
|
%start document
|
||||||
|
|
||||||
%token <string> NAME CHARDATA
|
%token <string> NAME CHARDATA STRING
|
||||||
%token VER ENC
|
%token MY_EOF
|
||||||
|
%token VER ENC SD
|
||||||
%token BSLASH ESLASH
|
%token BSLASH ESLASH
|
||||||
%token BTEXT ETEXT
|
%token BXMLDCL BQMARK EQMARK
|
||||||
%token BCOMMENT ECOMMENT
|
%token BCOMMENT ECOMMENT
|
||||||
|
|
||||||
%type <string> attvalue
|
%type <string> attvalue
|
||||||
|
|
@ -120,6 +121,7 @@ xml_parse_version(struct xml_parse_yacc_arg *ya,
|
||||||
free(ver);
|
free(ver);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (ver)
|
||||||
free(ver);
|
free(ver);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -299,6 +301,11 @@ xml_parse_bslash2(struct xml_parse_yacc_arg *ya,
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Parse XML attribute
|
||||||
|
* Special cases:
|
||||||
|
* - DefaultAttName: xmlns
|
||||||
|
* - PrefixedAttName: xmlns:NAME
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
||||||
char *prefix,
|
char *prefix,
|
||||||
|
|
@ -308,6 +315,12 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
||||||
int retval = -1;
|
int retval = -1;
|
||||||
cxobj *xa;
|
cxobj *xa;
|
||||||
|
|
||||||
|
#ifdef notyet
|
||||||
|
if (prefix && strcmp(prefix,"xmlns")==0)
|
||||||
|
fprintf(stderr, "PrefixedAttName NCNAME:%s = %s\n", name, attval);
|
||||||
|
if (prefix==NULL && strcmp(name,"xmlns")==0)
|
||||||
|
fprintf(stderr, "DefaultAttName = %s\n", attval);
|
||||||
|
#endif /* notyet */
|
||||||
if ((xa = xml_new(name, ya->ya_xelement, NULL)) == NULL)
|
if ((xa = xml_new(name, ya->ya_xelement, NULL)) == NULL)
|
||||||
goto done;
|
goto done;
|
||||||
xml_type_set(xa, CX_ATTR);
|
xml_type_set(xa, CX_ATTR);
|
||||||
|
|
@ -327,69 +340,100 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
/* [1] document ::= prolog element Misc* */
|
||||||
topxml : list
|
document : prolog element misclist MY_EOF
|
||||||
{ clicon_debug(3, "topxml->list ACCEPT");
|
{ clicon_debug(2, "document->prolog element misc* ACCEPT");
|
||||||
YYACCEPT; }
|
YYACCEPT; }
|
||||||
| dcl list
|
| elist MY_EOF
|
||||||
{ clicon_debug(3, "topxml->dcl list ACCEPT");
|
{ clicon_debug(2, "document->elist ACCEPT"); /* internal exception*/
|
||||||
YYACCEPT; }
|
YYACCEPT; }
|
||||||
;
|
;
|
||||||
|
/* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? */
|
||||||
dcl : BTEXT info encode ETEXT { clicon_debug(3, "dcl->info encode"); }
|
prolog : xmldcl misclist
|
||||||
|
{ clicon_debug(2, "prolog->xmldcl misc*"); }
|
||||||
|
| misclist
|
||||||
|
{ clicon_debug(2, "prolog->misc*"); }
|
||||||
;
|
;
|
||||||
|
|
||||||
info : VER '=' '\"' CHARDATA '\"'
|
misclist : misclist misc { clicon_debug(2, "misclist->misclist misc"); }
|
||||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT; }
|
| { clicon_debug(2, "misclist->"); }
|
||||||
| VER '=' '\'' CHARDATA '\''
|
;
|
||||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT; }
|
|
||||||
|
/* [27] Misc ::= Comment | PI | S */
|
||||||
|
misc : comment { clicon_debug(2, "misc->comment"); }
|
||||||
|
| pi { clicon_debug(2, "misc->pi"); }
|
||||||
|
;
|
||||||
|
|
||||||
|
xmldcl : BXMLDCL verinfo encodingdecl sddecl EQMARK
|
||||||
|
{ clicon_debug(2, "xmldcl->verinfo encodingdecl? sddecl?"); }
|
||||||
|
;
|
||||||
|
|
||||||
|
verinfo : VER '=' '\"' STRING '\"'
|
||||||
|
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
|
||||||
|
clicon_debug(2, "verinfo->version=\"STRING\"");}
|
||||||
|
| VER '=' '\'' STRING '\''
|
||||||
|
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
|
||||||
|
clicon_debug(2, "verinfo->version='STRING'");}
|
||||||
|
;
|
||||||
|
|
||||||
|
encodingdecl : ENC '=' '\"' STRING '\"' {if ($4)free($4);}
|
||||||
|
| ENC '=' '\'' STRING '\'' {if ($4)free($4);}
|
||||||
|
|
|
|
||||||
;
|
;
|
||||||
|
|
||||||
encode : ENC '=' '\"' CHARDATA '\"' {free($4);}
|
sddecl : SD '=' '\"' STRING '\"' {if ($4)free($4);}
|
||||||
| ENC '=' '\'' CHARDATA '\'' {free($4);}
|
| SD '=' '\'' STRING '\'' {if ($4)free($4);}
|
||||||
|
|
|
||||||
;
|
;
|
||||||
|
/* [39] element ::= EmptyElemTag | STag content ETag */
|
||||||
element : '<' qname attrs element1
|
element : '<' qname attrs element1
|
||||||
{ clicon_debug(3, "element -> < qname attrs element1"); }
|
{ clicon_debug(2, "element -> < qname attrs element1"); }
|
||||||
;
|
;
|
||||||
|
|
||||||
qname : NAME { if (xml_parse_unprefixed_name(_YA, $1) < 0) YYABORT;
|
qname : NAME { if (xml_parse_unprefixed_name(_YA, $1) < 0) YYABORT;
|
||||||
clicon_debug(3, "qname -> NAME %s", $1);}
|
clicon_debug(2, "qname -> NAME %s", $1);}
|
||||||
| NAME ':' NAME { if (xml_parse_prefixed_name(_YA, $1, $3) < 0) YYABORT;
|
| NAME ':' NAME { if (xml_parse_prefixed_name(_YA, $1, $3) < 0) YYABORT;
|
||||||
clicon_debug(3, "qname -> NAME : NAME");}
|
clicon_debug(2, "qname -> NAME : NAME");}
|
||||||
;
|
;
|
||||||
|
|
||||||
element1 : ESLASH {_YA->ya_xelement = NULL;
|
element1 : ESLASH {_YA->ya_xelement = NULL;
|
||||||
clicon_debug(3, "element1 -> />");}
|
clicon_debug(2, "element1 -> />");}
|
||||||
| '>' { xml_parse_endslash_pre(_YA); }
|
| '>' { xml_parse_endslash_pre(_YA); }
|
||||||
list { xml_parse_endslash_mid(_YA); }
|
elist { xml_parse_endslash_mid(_YA); }
|
||||||
etg { xml_parse_endslash_post(_YA);
|
endtag { xml_parse_endslash_post(_YA);
|
||||||
clicon_debug(3, "element1 -> > list etg");}
|
clicon_debug(2, "element1 -> > elist endtag");}
|
||||||
;
|
;
|
||||||
|
|
||||||
etg : BSLASH NAME '>'
|
endtag : BSLASH NAME '>'
|
||||||
{ clicon_debug(3, "etg -> < </ NAME %s>", $2); if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; }
|
{ clicon_debug(2, "endtag -> < </ NAME>");
|
||||||
|
if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; }
|
||||||
|
|
||||||
| BSLASH NAME ':' NAME '>'
|
| BSLASH NAME ':' NAME '>'
|
||||||
{ if (xml_parse_bslash2(_YA, $2, $4) < 0) YYABORT;
|
{ if (xml_parse_bslash2(_YA, $2, $4) < 0) YYABORT;
|
||||||
clicon_debug(3, "etg -> < </ NAME:NAME >"); }
|
clicon_debug(2, "endtag -> < </ NAME:NAME >"); }
|
||||||
;
|
;
|
||||||
|
|
||||||
list : list content { clicon_debug(3, "list -> list content"); }
|
elist : elist content { clicon_debug(2, "elist -> elist content"); }
|
||||||
| content { clicon_debug(3, "list -> content"); }
|
| content { clicon_debug(2, "elist -> content"); }
|
||||||
;
|
;
|
||||||
|
|
||||||
content : element { clicon_debug(3, "content -> element"); }
|
/* Rule 43 */
|
||||||
| comment { clicon_debug(3, "content -> comment"); }
|
content : element { clicon_debug(2, "content -> element"); }
|
||||||
|
| comment { clicon_debug(2, "content -> comment"); }
|
||||||
|
| pi { clicon_debug(2, "content -> pi"); }
|
||||||
| CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT;
|
| CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT;
|
||||||
clicon_debug(3, "content -> CHARDATA %s", $1); }
|
clicon_debug(2, "content -> CHARDATA %s", $1); }
|
||||||
| { clicon_debug(3, "content -> "); }
|
| { clicon_debug(2, "content -> "); }
|
||||||
;
|
;
|
||||||
|
|
||||||
comment : BCOMMENT ECOMMENT
|
comment : BCOMMENT ECOMMENT
|
||||||
;
|
;
|
||||||
|
|
||||||
|
pi : BQMARK NAME EQMARK {clicon_debug(2, "pi -> <? NAME ?>"); free($2); }
|
||||||
|
| BQMARK NAME STRING EQMARK
|
||||||
|
{clicon_debug(2, "pi -> <? NAME STRING ?>"); free($2); free($3);}
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
attrs : attrs attr
|
attrs : attrs attr
|
||||||
|
|
|
|
||||||
|
|
@ -399,9 +443,9 @@ attr : NAME '=' attvalue { if (xml_parse_attr(_YA, NULL, $1, $3)
|
||||||
| NAME ':' NAME '=' attvalue { if (xml_parse_attr(_YA, $1, $3, $5) < 0) YYABORT; }
|
| NAME ':' NAME '=' attvalue { if (xml_parse_attr(_YA, $1, $3, $5) < 0) YYABORT; }
|
||||||
;
|
;
|
||||||
|
|
||||||
attvalue : '\"' CHARDATA '\"' { $$=$2; /* $2 must be consumed */}
|
attvalue : '\"' STRING '\"' { $$=$2; /* $2 must be consumed */}
|
||||||
| '\"' '\"' { $$=strdup(""); /* $2 must be consumed */}
|
| '\"' '\"' { $$=strdup(""); /* $2 must be consumed */}
|
||||||
| '\'' CHARDATA '\'' { $$=$2; /* $2 must be consumed */}
|
| '\'' STRING '\'' { $$=$2; /* $2 must be consumed */}
|
||||||
| '\'' '\'' { $$=strdup(""); /* $2 must be consumed */}
|
| '\'' '\'' { $$=strdup(""); /* $2 must be consumed */}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,9 @@ expectfn(){
|
||||||
expect2=
|
expect2=
|
||||||
fi
|
fi
|
||||||
ret=$($cmd)
|
ret=$($cmd)
|
||||||
|
# echo "cmd:\"$cmd\""
|
||||||
|
# echo "retval:\"$retval\""
|
||||||
|
# echo "ret:\"$ret\""
|
||||||
if [ $? -ne $retval ]; then
|
if [ $? -ne $retval ]; then
|
||||||
echo -e "\e[31m\nError in Test$testnr [$testname]:"
|
echo -e "\e[31m\nError in Test$testnr [$testname]:"
|
||||||
echo -e "\e[0m:"
|
echo -e "\e[0m:"
|
||||||
|
|
@ -134,11 +137,15 @@ $input
|
||||||
EOF
|
EOF
|
||||||
)
|
)
|
||||||
r=$?
|
r=$?
|
||||||
if [ $r -ne $retval ]; then
|
if [ $r != $retval ]; then
|
||||||
echo -e "\e[31m\nError ($r != $retval) in Test$testnr [$testname]:"
|
echo -e "\e[31m\nError ($r != $retval) in Test$testnr [$testname]:"
|
||||||
echo -e "\e[0m:"
|
echo -e "\e[0m:"
|
||||||
exit -1
|
exit -1
|
||||||
fi
|
fi
|
||||||
|
# If error dont match output strings
|
||||||
|
if [ $r != 0 ]; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
# Match if both are empty string
|
# Match if both are empty string
|
||||||
if [ -z "$ret" -a -z "$expect" ]; then
|
if [ -z "$ret" -a -z "$expect" ]; then
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# Test: XML parser tests
|
# Test: XML parser tests
|
||||||
|
# @see https://www.w3.org/TR/2008/REC-xml-20081126
|
||||||
|
# https://www.w3.org/TR/2009/REC-xml-names-20091208
|
||||||
#PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml"
|
#PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml"
|
||||||
PROG=../util/clixon_util_xml
|
PROG=../util/clixon_util_xml
|
||||||
|
|
||||||
|
|
@ -9,6 +11,18 @@ PROG=../util/clixon_util_xml
|
||||||
new "xml parse"
|
new "xml parse"
|
||||||
expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$"
|
expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$"
|
||||||
|
|
||||||
|
new "xml parse strange names"
|
||||||
|
expecteof "$PROG" 0 "<_-><b0.><c-.-._/></b0.></_->" "^<_-><b0.><c-.-._/></b0.></_->$"
|
||||||
|
|
||||||
|
new "xml parse name errors"
|
||||||
|
expecteof "$PROG" 255 "<-a/>" ""
|
||||||
|
|
||||||
|
new "xml parse name errors"
|
||||||
|
expecteof "$PROG" 255 "<9/>" ""
|
||||||
|
|
||||||
|
new "xml parse name errors"
|
||||||
|
expecteof "$PROG" 255 "<a%/>" ""
|
||||||
|
|
||||||
XML=$(cat <<EOF
|
XML=$(cat <<EOF
|
||||||
<a><description>An example of escaped CENDs</description>
|
<a><description>An example of escaped CENDs</description>
|
||||||
<sometext>
|
<sometext>
|
||||||
|
|
@ -52,5 +66,75 @@ expecteof "$PROG" 0 "<x a='t'/>" '^<x a="t"/>$'
|
||||||
new "Mixed quotes"
|
new "Mixed quotes"
|
||||||
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$'
|
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$'
|
||||||
|
|
||||||
|
new "XMLdecl version"
|
||||||
|
expecteof "$PROG" 0 '<?xml version="1.0"?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "XMLdecl version, single quotes"
|
||||||
|
expecteof "$PROG" 0 "<?xml version='1.0'?><a/>" '<a/>'
|
||||||
|
|
||||||
|
new "XMLdecl version no element"
|
||||||
|
expecteof "$PROG" 255 '<?xml version="1.0"?>' ''
|
||||||
|
|
||||||
|
new "XMLdecl no version"
|
||||||
|
expecteof "$PROG" 255 '<?xml ?><a/>' ''
|
||||||
|
|
||||||
|
new "XMLdecl misspelled version"
|
||||||
|
expecteof "$PROG" 255 '<?xml verion="1.0"?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "XMLdecl version + encoding"
|
||||||
|
expecteof "$PROG" 0 '<?xml version="1.0" encoding="UTF-16"?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "XMLdecl version + misspelled encoding"
|
||||||
|
expecteof "$PROG" 255 '<?xml version="1.0" encding="UTF-16"?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "XMLdecl version + standalone"
|
||||||
|
expecteof "$PROG" 0 '<?xml version="1.0" standalone="yes"?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "PI - Processing instruction empty"
|
||||||
|
expecteof "$PROG" 0 '<?foo ?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "PI some content"
|
||||||
|
expecteof "$PROG" 0 '<?foo something else ?><a/>' '<a/>'
|
||||||
|
|
||||||
|
new "prolog element misc*"
|
||||||
|
expecteof "$PROG" 0 '<?foo something ?><a/><?bar more stuff ?><!-- a comment-->' '<a/>'
|
||||||
|
|
||||||
|
# We allow it as an internal necessity for parsing of xml fragments
|
||||||
|
#new "double element error"
|
||||||
|
#expecteof "$PROG" 255 '<a/><b/>' ''
|
||||||
|
|
||||||
|
new "namespace: DefaultAttName"
|
||||||
|
expecteof "$PROG" 0 '<x xmlns="n1">hello</x>' '^<x xmlns="n1">hello</x>$'
|
||||||
|
|
||||||
|
new "namespace: PrefixedAttName"
|
||||||
|
expecteof "$PROG" 0 '<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>' '^<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>$'
|
||||||
|
|
||||||
|
new "First example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
|
||||||
|
XML=$(cat <<EOF
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<html:html xmlns:html='http://www.w3.org/1999/xhtml'>
|
||||||
|
|
||||||
|
<html:head><html:title>Frobnostication</html:title></html:head>
|
||||||
|
<html:body><html:p>Moved to
|
||||||
|
<html:a href='http://frob.example.com'>here.</html:a></html:p></html:body>
|
||||||
|
</html:html>
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
expecteof "$PROG" 0 "$XML" "$XML"
|
||||||
|
|
||||||
|
new "Second example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
|
||||||
|
XML=$(cat <<EOF
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!-- both namespace prefixes are available throughout -->
|
||||||
|
<bk:book xmlns:bk='urn:loc.gov:books'
|
||||||
|
xmlns:isbn='urn:ISBN:0-395-36341-6'>
|
||||||
|
<bk:title>Cheaper by the Dozen</bk:title>
|
||||||
|
<isbn:number>1568491379</isbn:number>
|
||||||
|
</bk:book>
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
expecteof "$PROG" 0 "$XML" "$XML"
|
||||||
|
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ all: $(APPS)
|
||||||
@echo "You may want to make clixon_util_stream separately (curl dependency)"
|
@echo "You may want to make clixon_util_stream separately (curl dependency)"
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(APPS) *.core
|
rm -f $(APPS) clixon_util_stream *.core
|
||||||
|
|
||||||
# APPS
|
# APPS
|
||||||
clixon_util_xml: clixon_util_xml.c $(MYLIB)
|
clixon_util_xml: clixon_util_xml.c $(MYLIB)
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <fnmatch.h>
|
#include <fnmatch.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <syslog.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
/* cligen */
|
/* cligen */
|
||||||
|
|
@ -68,20 +69,39 @@
|
||||||
static int
|
static int
|
||||||
usage(char *argv0)
|
usage(char *argv0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "usage:%s.\n\tInput on stdin\n", argv0);
|
fprintf(stderr, "usage:%s [options]\n"
|
||||||
|
"where options are\n"
|
||||||
|
"\t-h \t\tHelp\n"
|
||||||
|
"\t-D <level> \tDebug\n",
|
||||||
|
argv0);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc,
|
||||||
|
char **argv)
|
||||||
{
|
{
|
||||||
cxobj *xt = NULL;
|
cxobj *xt = NULL;
|
||||||
cxobj *xc;
|
cxobj *xc;
|
||||||
cbuf *cb = cbuf_new();
|
cbuf *cb = cbuf_new();
|
||||||
|
int retval = -1;
|
||||||
|
char c;
|
||||||
|
|
||||||
if (argc != 1){
|
clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR);
|
||||||
|
optind = 1;
|
||||||
|
opterr = 0;
|
||||||
|
while ((c = getopt(argc, argv, "hD:")) != -1)
|
||||||
|
switch (c) {
|
||||||
|
case 'h':
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
return 0;
|
break;
|
||||||
|
case 'D':
|
||||||
|
if (sscanf(optarg, "%d", &debug) != 1)
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (xml_parse_file(0, "</config>", NULL, &xt) < 0){
|
if (xml_parse_file(0, "</config>", NULL, &xt) < 0){
|
||||||
fprintf(stderr, "xml parse error %s\n", clicon_err_reason);
|
fprintf(stderr, "xml parse error %s\n", clicon_err_reason);
|
||||||
|
|
@ -90,18 +110,20 @@ main(int argc, char **argv)
|
||||||
xc = NULL;
|
xc = NULL;
|
||||||
while ((xc = xml_child_each(xt, xc, -1)) != NULL)
|
while ((xc = xml_child_each(xt, xc, -1)) != NULL)
|
||||||
clicon_xml2cbuf(cb, xc, 0, 0); /* print xml */
|
clicon_xml2cbuf(cb, xc, 0, 0); /* print xml */
|
||||||
fprintf(stdout, "%s\n", cbuf_get(cb));
|
fprintf(stdout, "%s", cbuf_get(cb));
|
||||||
|
fflush(stdout);
|
||||||
#if 0
|
#if 0
|
||||||
cbuf_reset(cb);
|
cbuf_reset(cb);
|
||||||
xmltree2cbuf(cb, xt, 0); /* dump data structures */
|
xmltree2cbuf(cb, xt, 0); /* dump data structures */
|
||||||
fprintf(stderr, "%s\n", cbuf_get(cb));
|
fprintf(stderr, "%s\n", cbuf_get(cb));
|
||||||
#endif
|
#endif
|
||||||
|
retval = 0;
|
||||||
done:
|
done:
|
||||||
if (xt)
|
if (xt)
|
||||||
xml_free(xt);
|
xml_free(xt);
|
||||||
if (cb)
|
if (cb)
|
||||||
cbuf_free(cb);
|
cbuf_free(cb);
|
||||||
return 0;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -133,13 +133,14 @@ main(int argc, char **argv)
|
||||||
clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR);
|
clicon_log_init("xpath", LOG_DEBUG, CLICON_LOG_STDERR);
|
||||||
optind = 1;
|
optind = 1;
|
||||||
opterr = 0;
|
opterr = 0;
|
||||||
while ((c = getopt(argc, argv, "hDf:p:i:")) != -1)
|
while ((c = getopt(argc, argv, "hD:f:p:i:")) != -1)
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'h':
|
case 'h':
|
||||||
usage(argv0);
|
usage(argv0);
|
||||||
break;
|
break;
|
||||||
case 'D':
|
case 'D':
|
||||||
debug++;
|
if (sscanf(optarg, "%d", &debug) != 1)
|
||||||
|
usage(argv0);
|
||||||
break;
|
break;
|
||||||
case 'f': /* XML file */
|
case 'f': /* XML file */
|
||||||
filename = optarg;
|
filename = optarg;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue