* XML parser conformance to W3 spec

* Names lexically correct (NCName)
  * Syntactically Correct handling of '<?' (processing instructions) and '<?xml' (XML declaration)
  * XML prolog syntax for 'well-formed' XML
  * <!DOCTYPE (ie DTD) is not supported.
This commit is contained in:
Olof hagsand 2018-11-18 13:22:08 +01:00
parent 9bd0dc42c6
commit 9c57902b96
9 changed files with 280 additions and 69 deletions

View file

@ -1,5 +1,7 @@
#!/bin/bash
# Test: XML parser tests
# @see https://www.w3.org/TR/2008/REC-xml-20081126
# https://www.w3.org/TR/2009/REC-xml-names-20091208
#PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml"
PROG=../util/clixon_util_xml
@ -9,6 +11,18 @@ PROG=../util/clixon_util_xml
new "xml parse"
expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$"
new "xml parse strange names"
expecteof "$PROG" 0 "<_-><b0.><c-.-._/></b0.></_->" "^<_-><b0.><c-.-._/></b0.></_->$"
new "xml parse name errors"
expecteof "$PROG" 255 "<-a/>" ""
new "xml parse name errors"
expecteof "$PROG" 255 "<9/>" ""
new "xml parse name errors"
expecteof "$PROG" 255 "<a%/>" ""
XML=$(cat <<EOF
<a><description>An example of escaped CENDs</description>
<sometext>
@ -52,5 +66,75 @@ expecteof "$PROG" 0 "<x a='t'/>" '^<x a="t"/>$'
new "Mixed quotes"
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$'
new "XMLdecl version"
expecteof "$PROG" 0 '<?xml version="1.0"?><a/>' '<a/>'
new "XMLdecl version, single quotes"
expecteof "$PROG" 0 "<?xml version='1.0'?><a/>" '<a/>'
new "XMLdecl version no element"
expecteof "$PROG" 255 '<?xml version="1.0"?>' ''
new "XMLdecl no version"
expecteof "$PROG" 255 '<?xml ?><a/>' ''
new "XMLdecl misspelled version"
expecteof "$PROG" 255 '<?xml verion="1.0"?><a/>' '<a/>'
new "XMLdecl version + encoding"
expecteof "$PROG" 0 '<?xml version="1.0" encoding="UTF-16"?><a/>' '<a/>'
new "XMLdecl version + misspelled encoding"
expecteof "$PROG" 255 '<?xml version="1.0" encding="UTF-16"?><a/>' '<a/>'
new "XMLdecl version + standalone"
expecteof "$PROG" 0 '<?xml version="1.0" standalone="yes"?><a/>' '<a/>'
new "PI - Processing instruction empty"
expecteof "$PROG" 0 '<?foo ?><a/>' '<a/>'
new "PI some content"
expecteof "$PROG" 0 '<?foo something else ?><a/>' '<a/>'
new "prolog element misc*"
expecteof "$PROG" 0 '<?foo something ?><a/><?bar more stuff ?><!-- a comment-->' '<a/>'
# We allow it as an internal necessity for parsing of xml fragments
#new "double element error"
#expecteof "$PROG" 255 '<a/><b/>' ''
new "namespace: DefaultAttName"
expecteof "$PROG" 0 '<x xmlns="n1">hello</x>' '^<x xmlns="n1">hello</x>$'
new "namespace: PrefixedAttName"
expecteof "$PROG" 0 '<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>' '^<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>$'
new "First example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
XML=$(cat <<EOF
<?xml version="1.0"?>
<html:html xmlns:html='http://www.w3.org/1999/xhtml'>
<html:head><html:title>Frobnostication</html:title></html:head>
<html:body><html:p>Moved to
<html:a href='http://frob.example.com'>here.</html:a></html:p></html:body>
</html:html>
EOF
)
expecteof "$PROG" 0 "$XML" "$XML"
new "Second example 6.1 from https://www.w3.org/TR/2009/REC-xml-names-20091208"
XML=$(cat <<EOF
<?xml version="1.0"?>
<!-- both namespace prefixes are available throughout -->
<bk:book xmlns:bk='urn:loc.gov:books'
xmlns:isbn='urn:ISBN:0-395-36341-6'>
<bk:title>Cheaper by the Dozen</bk:title>
<isbn:number>1568491379</isbn:number>
</bk:book>
EOF
)
expecteof "$PROG" 0 "$XML" "$XML"
rm -rf $dir