* XML parser conformance to W3 spec
* Names lexically correct (NCName) * Syntactically Correct handling of '<?' (processing instructions) and '<?xml' (XML declaration) * XML prolog syntax for 'well-formed' XML * <!DOCTYPE (ie DTD) is not supported.
This commit is contained in:
parent
9bd0dc42c6
commit
9c57902b96
9 changed files with 280 additions and 69 deletions
|
|
@ -99,10 +99,26 @@
|
|||
* <x>a</<x>
|
||||
* <x>b</<x>
|
||||
* </c>
|
||||
* From https://www.w3.org/TR/2009/REC-xml-names-20091208
|
||||
* Definitions:
|
||||
* - XML namespace: is identified by a URI reference [RFC3986]; element and
|
||||
* attribute names may be placed in an XML namespace using the mechanisms
|
||||
* described in this specification.
|
||||
* - Expanded name: is a pair consisting of a namespace name and a local name.
|
||||
* - Namespace name: For a name N in a namespace identified by a URI I, the
|
||||
* "namespace name" is I.
|
||||
* For a name N that is not in a namespace, the "namespace name" has no value.
|
||||
* - Local name: In either case the "local name" is N.
|
||||
* It is this combination of the universally managed URI namespace with the
|
||||
* vocabulary's local names that is effective in avoiding name clashes.
|
||||
*/
|
||||
struct xml{
|
||||
char *x_name; /* name of node */
|
||||
char *x_namespace; /* namespace, if any */
|
||||
#ifdef notyet
|
||||
char *x_namespacename; /* namespace name (or NULL) */
|
||||
char *x_localname; /* Local name N as defined above */
|
||||
#endif
|
||||
struct xml *x_up; /* parent node in hierarchy if any */
|
||||
struct xml **x_childvec; /* vector of children nodes */
|
||||
int x_childvec_len;/* length of vector */
|
||||
|
|
@ -224,7 +240,7 @@ xmlns_check(cxobj *xn,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*! Check namespace of xml node by searhing recursively among ancestors
|
||||
/*! Check namespace of xml node by searching recursively among ancestors
|
||||
* @param[in] xn xml node
|
||||
* @param[in] namespace check validity of namespace
|
||||
* @retval 0 Found / validated or no yang spec
|
||||
|
|
@ -1258,15 +1274,18 @@ xmltree2cbuf(cbuf *cb,
|
|||
* @see xml_parse_file
|
||||
* @see xml_parse_string
|
||||
* @see xml_parse_va
|
||||
* @note special case is empty XML where the parser is not invoked.
|
||||
*/
|
||||
static int
|
||||
_xml_parse(const char *str,
|
||||
_xml_parse(const char *str,
|
||||
yang_spec *yspec,
|
||||
cxobj *xt)
|
||||
{
|
||||
int retval = -1;
|
||||
struct xml_parse_yacc_arg ya = {0,};
|
||||
|
||||
if (strlen(str) == 0)
|
||||
return 0; /* OK */
|
||||
if (xt == NULL){
|
||||
clicon_err(OE_XML, errno, "Unexpected NULL XML");
|
||||
return -1;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@
|
|||
* XML parser
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126
|
||||
* https://www.w3.org/TR/2009/REC-xml-names-20091208
|
||||
*
|
||||
|
||||
*/
|
||||
|
||||
%{
|
||||
|
|
@ -72,8 +74,22 @@ int clixon_xml_parsewrap(void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* From https://www.w3.org/TR/2008/REC-xml-20081126:
|
||||
* [4]* NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] ...
|
||||
* [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7|
|
||||
* [5] Name ::= NameStartChar (NameChar)*
|
||||
* NOTE: From https://www.w3.org/TR/2009/REC-xml-names-20091208:
|
||||
* [4] NCName ::= Name - (Char* ':' Char*) An XML Name, minus the ":"
|
||||
* --> namestart and name below is NCNAME
|
||||
*/
|
||||
|
||||
%}
|
||||
|
||||
namestart [A-Z_a-z]
|
||||
namechar [A-Z_a-z\-\.0-9]
|
||||
ncname {namestart}{namechar}*
|
||||
|
||||
%x START
|
||||
%s STATEA
|
||||
%s AMPERSAND
|
||||
|
|
@ -81,36 +97,42 @@ int clixon_xml_parsewrap(void)
|
|||
%s CMNT
|
||||
%s STR
|
||||
%s TEXTDECL
|
||||
%s PIDECL
|
||||
%s PIDECL2
|
||||
%s STRDQ
|
||||
%s STRSQ
|
||||
|
||||
%%
|
||||
<START>[0-9A-Za-z_\-]+ { clixon_xml_parselval.string = strdup(yytext);
|
||||
|
||||
<START,TEXTDECL>[ \t] ;
|
||||
<START,STATEA,CMNT,TEXTDECL>\n { _YA->ya_linenum++; }
|
||||
|
||||
<START>{ncname} { clixon_xml_parselval.string = strdup(yytext);
|
||||
return NAME; /* rather be catch-all */
|
||||
}
|
||||
<START>[ \t]+ ;
|
||||
<START>\: return *clixon_xml_parsetext;
|
||||
<START>\n { _YA->ya_linenum++;}
|
||||
<START>"<?xml" { BEGIN(TEXTDECL); return BTEXT;}
|
||||
<START><<EOF>> { return MY_EOF; }
|
||||
<START>"<?xml" { BEGIN(TEXTDECL); return BXMLDCL;}
|
||||
<START>"<?" { BEGIN(PIDECL); return BQMARK;}
|
||||
<START>"/>" { BEGIN(STATEA); return ESLASH; }
|
||||
<START>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
||||
<START>"</" return BSLASH;
|
||||
<START>[/=] return *clixon_xml_parsetext;
|
||||
<START>\< return *clixon_xml_parsetext;
|
||||
<START>\> { BEGIN(STATEA); return *clixon_xml_parsetext; }
|
||||
|
||||
<START>\" { _YA->ya_lex_state=START;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
||||
<START>\' { _YA->ya_lex_state=START;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
||||
<START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/ }
|
||||
<START>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||
|
||||
<STATEA>"</" { BEGIN(START); return BSLASH; }
|
||||
<STATEA><<EOF>> { return MY_EOF; }
|
||||
<STATEA>"<!--" { BEGIN(CMNT); return BCOMMENT; }
|
||||
<STATEA>"<![CDATA[" { BEGIN(CDATA); _YA->ya_lex_state = STATEA; clixon_xml_parselval.string = yytext; return CHARDATA;}
|
||||
<STATEA>"<?" { BEGIN(PIDECL); return BQMARK; }
|
||||
<STATEA>\< { BEGIN(START); return *clixon_xml_parsetext; }
|
||||
<STATEA>& { _YA->ya_lex_state =STATEA;BEGIN(AMPERSAND);}
|
||||
<STATEA>\n { clixon_xml_parselval.string = yytext;_YA->ya_linenum++; return (CHARDATA);}
|
||||
|
||||
<STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; /*XXX:optimize*/}
|
||||
<STATEA>. { clixon_xml_parselval.string = yytext; return CHARDATA; }
|
||||
|
||||
/* @see xml_chardata_encode */
|
||||
<AMPERSAND>"amp;" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = "&"; return CHARDATA;}
|
||||
|
|
@ -124,21 +146,30 @@ int clixon_xml_parsewrap(void)
|
|||
<CDATA>"]]>" { BEGIN(_YA->ya_lex_state); clixon_xml_parselval.string = yytext; return CHARDATA;}
|
||||
|
||||
<CMNT>"-->" { BEGIN(START); return ECOMMENT; }
|
||||
<CMNT>\n _YA->ya_linenum++;
|
||||
<CMNT>.
|
||||
<TEXTDECL>encoding return ENC;
|
||||
<TEXTDECL>version return VER;
|
||||
<TEXTDECL>"=" return *clixon_xml_parsetext;
|
||||
<TEXTDECL>"?>" { BEGIN(START);return ETEXT;}
|
||||
<TEXTDECL>standalone return SD;
|
||||
<TEXTDECL>"=" { return *clixon_xml_parsetext; }
|
||||
<TEXTDECL>"?>" { BEGIN(START);return EQMARK;}
|
||||
<TEXTDECL>\" { _YA->ya_lex_state =TEXTDECL;BEGIN(STRDQ); return *clixon_xml_parsetext; }
|
||||
<TEXTDECL>\' { _YA->ya_lex_state =TEXTDECL;BEGIN(STRSQ); return *clixon_xml_parsetext; }
|
||||
<TEXTDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||
|
||||
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
||||
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
||||
<PIDECL>{ncname} { clixon_xml_parselval.string = strdup(yytext);
|
||||
return NAME; /* rather be catch-all */
|
||||
}
|
||||
<PIDECL>[ \t] { BEGIN(PIDECL2);}
|
||||
<PIDECL>. { clixon_xml_parselval.string = yytext; return CHARDATA; /* optimize? */}
|
||||
<PIDECL2>"?>" { BEGIN(START);return EQMARK;}
|
||||
<PIDECL2>[^{?>}]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||
|
||||
<STRDQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||
<STRDQ>[^\"]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||
<STRDQ>\" { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
||||
|
||||
<STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
||||
<STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return CHARDATA; }
|
||||
<STRSQ>1\.[0-9]+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||
<STRSQ>[^\']+ { clixon_xml_parselval.string = strdup(yytext); return STRING; }
|
||||
<STRSQ>\' { BEGIN(_YA->ya_lex_state); return *clixon_xml_parsetext; }
|
||||
|
||||
%%
|
||||
|
|
|
|||
|
|
@ -39,12 +39,13 @@
|
|||
char *string;
|
||||
}
|
||||
|
||||
%start topxml
|
||||
%start document
|
||||
|
||||
%token <string> NAME CHARDATA
|
||||
%token VER ENC
|
||||
%token <string> NAME CHARDATA STRING
|
||||
%token MY_EOF
|
||||
%token VER ENC SD
|
||||
%token BSLASH ESLASH
|
||||
%token BTEXT ETEXT
|
||||
%token BXMLDCL BQMARK EQMARK
|
||||
%token BCOMMENT ECOMMENT
|
||||
|
||||
%type <string> attvalue
|
||||
|
|
@ -120,7 +121,8 @@ xml_parse_version(struct xml_parse_yacc_arg *ya,
|
|||
free(ver);
|
||||
return -1;
|
||||
}
|
||||
free(ver);
|
||||
if (ver)
|
||||
free(ver);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -299,6 +301,11 @@ xml_parse_bslash2(struct xml_parse_yacc_arg *ya,
|
|||
return retval;
|
||||
}
|
||||
|
||||
/*! Parse XML attribute
|
||||
* Special cases:
|
||||
* - DefaultAttName: xmlns
|
||||
* - PrefixedAttName: xmlns:NAME
|
||||
*/
|
||||
static int
|
||||
xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
||||
char *prefix,
|
||||
|
|
@ -308,6 +315,12 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
|||
int retval = -1;
|
||||
cxobj *xa;
|
||||
|
||||
#ifdef notyet
|
||||
if (prefix && strcmp(prefix,"xmlns")==0)
|
||||
fprintf(stderr, "PrefixedAttName NCNAME:%s = %s\n", name, attval);
|
||||
if (prefix==NULL && strcmp(name,"xmlns")==0)
|
||||
fprintf(stderr, "DefaultAttName = %s\n", attval);
|
||||
#endif /* notyet */
|
||||
if ((xa = xml_new(name, ya->ya_xelement, NULL)) == NULL)
|
||||
goto done;
|
||||
xml_type_set(xa, CX_ATTR);
|
||||
|
|
@ -327,69 +340,100 @@ xml_parse_attr(struct xml_parse_yacc_arg *ya,
|
|||
%}
|
||||
|
||||
%%
|
||||
|
||||
topxml : list
|
||||
{ clicon_debug(3, "topxml->list ACCEPT");
|
||||
YYACCEPT; }
|
||||
| dcl list
|
||||
{ clicon_debug(3, "topxml->dcl list ACCEPT");
|
||||
YYACCEPT; }
|
||||
/* [1] document ::= prolog element Misc* */
|
||||
document : prolog element misclist MY_EOF
|
||||
{ clicon_debug(2, "document->prolog element misc* ACCEPT");
|
||||
YYACCEPT; }
|
||||
| elist MY_EOF
|
||||
{ clicon_debug(2, "document->elist ACCEPT"); /* internal exception*/
|
||||
YYACCEPT; }
|
||||
;
|
||||
/* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? */
|
||||
prolog : xmldcl misclist
|
||||
{ clicon_debug(2, "prolog->xmldcl misc*"); }
|
||||
| misclist
|
||||
{ clicon_debug(2, "prolog->misc*"); }
|
||||
;
|
||||
|
||||
dcl : BTEXT info encode ETEXT { clicon_debug(3, "dcl->info encode"); }
|
||||
misclist : misclist misc { clicon_debug(2, "misclist->misclist misc"); }
|
||||
| { clicon_debug(2, "misclist->"); }
|
||||
;
|
||||
|
||||
info : VER '=' '\"' CHARDATA '\"'
|
||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT; }
|
||||
| VER '=' '\'' CHARDATA '\''
|
||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT; }
|
||||
/* [27] Misc ::= Comment | PI | S */
|
||||
misc : comment { clicon_debug(2, "misc->comment"); }
|
||||
| pi { clicon_debug(2, "misc->pi"); }
|
||||
;
|
||||
|
||||
xmldcl : BXMLDCL verinfo encodingdecl sddecl EQMARK
|
||||
{ clicon_debug(2, "xmldcl->verinfo encodingdecl? sddecl?"); }
|
||||
;
|
||||
|
||||
verinfo : VER '=' '\"' STRING '\"'
|
||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
|
||||
clicon_debug(2, "verinfo->version=\"STRING\"");}
|
||||
| VER '=' '\'' STRING '\''
|
||||
{ if (xml_parse_version(_YA, $4) <0) YYABORT;
|
||||
clicon_debug(2, "verinfo->version='STRING'");}
|
||||
;
|
||||
|
||||
encodingdecl : ENC '=' '\"' STRING '\"' {if ($4)free($4);}
|
||||
| ENC '=' '\'' STRING '\'' {if ($4)free($4);}
|
||||
|
|
||||
;
|
||||
|
||||
encode : ENC '=' '\"' CHARDATA '\"' {free($4);}
|
||||
| ENC '=' '\'' CHARDATA '\'' {free($4);}
|
||||
sddecl : SD '=' '\"' STRING '\"' {if ($4)free($4);}
|
||||
| SD '=' '\'' STRING '\'' {if ($4)free($4);}
|
||||
|
|
||||
;
|
||||
|
||||
/* [39] element ::= EmptyElemTag | STag content ETag */
|
||||
element : '<' qname attrs element1
|
||||
{ clicon_debug(3, "element -> < qname attrs element1"); }
|
||||
;
|
||||
{ clicon_debug(2, "element -> < qname attrs element1"); }
|
||||
;
|
||||
|
||||
qname : NAME { if (xml_parse_unprefixed_name(_YA, $1) < 0) YYABORT;
|
||||
clicon_debug(3, "qname -> NAME %s", $1);}
|
||||
clicon_debug(2, "qname -> NAME %s", $1);}
|
||||
| NAME ':' NAME { if (xml_parse_prefixed_name(_YA, $1, $3) < 0) YYABORT;
|
||||
clicon_debug(3, "qname -> NAME : NAME");}
|
||||
clicon_debug(2, "qname -> NAME : NAME");}
|
||||
;
|
||||
|
||||
element1 : ESLASH {_YA->ya_xelement = NULL;
|
||||
clicon_debug(3, "element1 -> />");}
|
||||
clicon_debug(2, "element1 -> />");}
|
||||
| '>' { xml_parse_endslash_pre(_YA); }
|
||||
list { xml_parse_endslash_mid(_YA); }
|
||||
etg { xml_parse_endslash_post(_YA);
|
||||
clicon_debug(3, "element1 -> > list etg");}
|
||||
elist { xml_parse_endslash_mid(_YA); }
|
||||
endtag { xml_parse_endslash_post(_YA);
|
||||
clicon_debug(2, "element1 -> > elist endtag");}
|
||||
;
|
||||
|
||||
etg : BSLASH NAME '>'
|
||||
{ clicon_debug(3, "etg -> < </ NAME %s>", $2); if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; }
|
||||
endtag : BSLASH NAME '>'
|
||||
{ clicon_debug(2, "endtag -> < </ NAME>");
|
||||
if (xml_parse_bslash1(_YA, $2) < 0) YYABORT; }
|
||||
|
||||
| BSLASH NAME ':' NAME '>'
|
||||
{ if (xml_parse_bslash2(_YA, $2, $4) < 0) YYABORT;
|
||||
clicon_debug(3, "etg -> < </ NAME:NAME >"); }
|
||||
clicon_debug(2, "endtag -> < </ NAME:NAME >"); }
|
||||
;
|
||||
|
||||
list : list content { clicon_debug(3, "list -> list content"); }
|
||||
| content { clicon_debug(3, "list -> content"); }
|
||||
elist : elist content { clicon_debug(2, "elist -> elist content"); }
|
||||
| content { clicon_debug(2, "elist -> content"); }
|
||||
;
|
||||
|
||||
content : element { clicon_debug(3, "content -> element"); }
|
||||
| comment { clicon_debug(3, "content -> comment"); }
|
||||
| CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT;
|
||||
clicon_debug(3, "content -> CHARDATA %s", $1); }
|
||||
| { clicon_debug(3, "content -> "); }
|
||||
/* Rule 43 */
|
||||
content : element { clicon_debug(2, "content -> element"); }
|
||||
| comment { clicon_debug(2, "content -> comment"); }
|
||||
| pi { clicon_debug(2, "content -> pi"); }
|
||||
| CHARDATA { if (xml_parse_content(_YA, $1) < 0) YYABORT;
|
||||
clicon_debug(2, "content -> CHARDATA %s", $1); }
|
||||
| { clicon_debug(2, "content -> "); }
|
||||
;
|
||||
|
||||
comment : BCOMMENT ECOMMENT
|
||||
;
|
||||
|
||||
pi : BQMARK NAME EQMARK {clicon_debug(2, "pi -> <? NAME ?>"); free($2); }
|
||||
| BQMARK NAME STRING EQMARK
|
||||
{clicon_debug(2, "pi -> <? NAME STRING ?>"); free($2); free($3);}
|
||||
;
|
||||
|
||||
|
||||
attrs : attrs attr
|
||||
|
|
||||
|
|
@ -399,9 +443,9 @@ attr : NAME '=' attvalue { if (xml_parse_attr(_YA, NULL, $1, $3)
|
|||
| NAME ':' NAME '=' attvalue { if (xml_parse_attr(_YA, $1, $3, $5) < 0) YYABORT; }
|
||||
;
|
||||
|
||||
attvalue : '\"' CHARDATA '\"' { $$=$2; /* $2 must be consumed */}
|
||||
attvalue : '\"' STRING '\"' { $$=$2; /* $2 must be consumed */}
|
||||
| '\"' '\"' { $$=strdup(""); /* $2 must be consumed */}
|
||||
| '\'' CHARDATA '\'' { $$=$2; /* $2 must be consumed */}
|
||||
| '\'' STRING '\'' { $$=$2; /* $2 must be consumed */}
|
||||
| '\'' '\'' { $$=strdup(""); /* $2 must be consumed */}
|
||||
;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue