* CDATA sections stripped from XML when converted to JSON
This commit is contained in:
Olof hagsand 2019-01-19 21:39:19 +01:00
parent 906b93cae0
commit 19f26e6838
4 changed files with 149 additions and 51 deletions

View file

@ -1,24 +1,22 @@
#!/bin/bash
# Test: XML parser tests
# Test: XML parser tests and JSON translation
# @see https://www.w3.org/TR/2008/REC-xml-20081126
# https://www.w3.org/TR/2009/REC-xml-names-20091208
#PROG="valgrind --leak-check=full --show-leak-kinds=all ../util/clixon_util_xml"
# include err() and new() functions and creates $dir
. ./lib.sh
PROG="../util/clixon_util_xml -D $DBG"
new "xml parse"
expecteof "$PROG" 0 "<a><b/></a>" "^<a><b/></a>$"
new "xml parse to json"
expecteof "$PROG -j" 0 "<a><b/></a>" '^{"a": {"b": null}}$'
expecteof "$PROG -j" 0 "<a><b/></a>" '{"a": {"b": null}}'
new "xml parse strange names"
expecteof "$PROG" 0 "<_-><b0.><c-.-._/></b0.></_->" "^<_-><b0.><c-.-._/></b0.></_->$"
expecteof "$PROG" 0 "<_-><b0.><c-.-._/></b0.></_->" "<_-><b0.><c-.-._/></b0.></_->"
new "xml parse name errors"
expecteof "$PROG" 255 "<-a/>" ""
@ -37,11 +35,16 @@ if [ "$ret" != "<x>a${LF}b${LF}c${LF}d</x>" ]; then
err '<x>a$LFb$LFc</x>' "$ret"
fi
new "xml simple CDATA"
expecteofx "$PROG" 0 '<a><![CDATA[a text]]></a>' '<a><![CDATA[a text]]></a>'
new "xml simple CDATA to json"
expecteofx "$PROG -j" 0 '<a><![CDATA[a text]]></a>' '{"a": "a text"}'
new "xml complex CDATA"
XML=$(cat <<EOF
<a><description>An example of escaped CENDs</description>
<sometext>
<![CDATA[ They're saying "x < y" & that "z > y" so I guess that means that z > x ]]>
</sometext>
<sometext><![CDATA[ They're saying "x < y" & that "z > y" so I guess that means that z > x ]]></sometext>
<!-- This text contains a CEND ]]> -->
<!-- In this first case we put the ]] at the end of the first CDATA block
and the > in the second CDATA block -->
@ -53,33 +56,55 @@ XML=$(cat <<EOF
EOF
)
new "xml CDATA"
expecteof "$PROG" 0 "$XML" "^<a><description>An example of escaped CENDs</description><sometext>
<![CDATA[ They're saying \"x < y\" & that \"z > y\" so I guess that means that z > x ]]>
</sometext><data><![CDATA[This text contains a CEND ]]]]><![CDATA[>]]></data><alternative><![CDATA[This text contains a CEND ]]]><![CDATA[]>]]></alternative></a>$"
JSON=$(cat <<EOF
{"a": {"description": "An example of escaped CENDs","sometext": " They're saying \"x < y\" & that \"z > y\" so I guess that means that z > x ","data": "This text contains a CEND ]]>","alternative": "This text contains a CEND ]]>"}}
EOF
)
new "xml complex CDATA to json"
expecteofx "$PROG -j" 0 "$XML" "$JSON"
XML=$(cat <<EOF
<message>Less than: &lt; , greater than: &gt; ampersand: &amp; </message>
EOF
)
new "xml encode <>&"
expecteof "$PROG" 0 "$XML" "^$XML$"
expecteof "$PROG" 0 "$XML" "$XML"
new "xml encode <>& to json"
expecteof "$PROG -j" 0 "$XML" '{"message": "Less than: < , greater than: > ampersand: & "}'
XML=$(cat <<EOF
<message>To allow attribute values to contain both single and double quotes, the apostrophe or single-quote character ' may be represented as &apos; and the double-quote character as &quot;</message>
<message>single-quote character ' represented as &apos; and double-quote character as &quot;</message>
EOF
)
new "xml optional encode single and double quote"
expecteof "$PROG" 0 "$XML" "^<message>To allow attribute values to contain both single and double quotes, the apostrophe or single-quote character ' may be represented as ' and the double-quote character as \"</message>$"
new "xml single and double quote"
expecteof "$PROG" 0 "$XML" "<message>single-quote character ' represented as ' and double-quote character as \"</message>"
JSON=$(cat <<EOF
{"message": "single-quote character ' represented as ' and double-quote character as \""}
EOF
)
new "xml single and double quotes to json"
expecteofx "$PROG -j" 0 "$XML" "$JSON"
new "xml backspace"
expecteofx "$PROG" 0 "<a>a\b</a>" "<a>a\b</a>"
new "xml backspace to json"
expecteofx "$PROG -j" 0 "<a>a\b</a>" '{"a": "a\\b"}'
new "Double quotes for attributes"
expecteof "$PROG" 0 '<x a="t"/>' '^<x a="t"/>$'
expecteof "$PROG" 0 '<x a="t"/>' '<x a="t"/>'
new "Single quotes for attributes (returns double quotes but at least parses right)"
expecteof "$PROG" 0 "<x a='t'/>" '^<x a="t"/>$'
expecteof "$PROG" 0 "<x a='t'/>" '<x a="t"/>'
new "Mixed quotes"
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '^<x a="t" b="q"/>$'
expecteof "$PROG" 0 "<x a='t' b=\"q\"/>" '<x a="t" b="q"/>'
new "XMLdecl version"
expecteof "$PROG" 0 '<?xml version="1.0"?><a/>' '<a/>'
@ -94,7 +119,7 @@ new "XMLdecl no version"
expecteof "$PROG" 255 '<?xml ?><a/>' ''
new "XMLdecl misspelled version"
expecteof "$PROG -l o" 255 '<?xml verion="1.0"?><a/>' 'yntax error: at or before: v'
expecteof "$PROG -l o" 255 '<?xml verion="1.0"?><a/>' ''
new "XMLdecl version + encoding"
expecteof "$PROG" 0 '<?xml version="1.0" encoding="UTF-16"?><a/>' '<a/>'
@ -119,7 +144,7 @@ expecteof "$PROG" 0 '<?foo something ?><a/><?bar more stuff ?><!-- a comment-->'
#expecteof "$PROG" 255 '<a/><b/>' ''
new "namespace: DefaultAttName"
expecteof "$PROG" 0 '<x xmlns="n1">hello</x>' '^<x xmlns="n1">hello</x>$'
expecteof "$PROG" 0 '<x xmlns="n1">hello</x>' '<x xmlns="n1">hello</x>'
new "namespace: PrefixedAttName"
expecteof "$PROG" 0 '<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>' '^<x xmlns:n2="urn:example:des"><n2:y>hello</n2:y></x>$'