* Optimized prefix checks at xml parse time: using many prefixes slowed down parsing considerably

* Cleared startup-db cache after restart
This commit is contained in:
Olof hagsand 2020-04-29 23:11:48 +02:00
parent 5b57e68e2b
commit b8ec6a4957
12 changed files with 232 additions and 139 deletions

View file

@ -50,9 +50,11 @@ Expected: May 2020
* Optimizations
* Reduced memory for attribute and body objects, see `XML_NEW_DIFFERENTIATE` compile-time option.
* Optimized prefix checks at xml parse time: using many prefixes slowed down parsing considerably
* Optimized cbuf handling in parsing and xml2cbuf functions.
* Optimized xml scanner to read strings rather than single chars
* Optimized xml_merge for the case of disjunct trees.
* Cleared startup-db cache after restart
* Experimental: restart_plugin
* Two new plugin callbacks added
* ca_daemon: Called just after a server has "daemonized", ie put in background.

View file

@ -834,6 +834,8 @@ main(int argc,
if (ret != 1)
if (xmldb_copy(h, "tmp", "running") < 0)
goto done;
/* clear startup dbcache */
xmldb_clear(h, "startup");
if (ret2status(ret, &status) < 0)
goto done;
/* if status = STARTUP_INVALID, cbret contains info */

View file

@ -63,6 +63,7 @@ int xmldb_unlock(clicon_handle h, const char *db);
int xmldb_unlock_all(clicon_handle h, uint32_t id);
uint32_t xmldb_islocked(clicon_handle h, const char *db);
int xmldb_exists(clicon_handle h, const char *db);
int xmldb_clear(clicon_handle h, const char *db);
int xmldb_delete(clicon_handle h, const char *db);
int xmldb_create(clicon_handle h, const char *db);
/* utility functions */

View file

@ -58,7 +58,7 @@ int xml_nsctx_yang(yang_stmt *yn, cvec **ncp);
int xml_nsctx_yangspec(yang_stmt *yspec, cvec **ncp);
int xml2ns(cxobj *x, char *localname, char **namespace);
int xml2ns_recurse(cxobj *x);
int xml2prefix(cxobj *xn, char *namespace, char **prefixp);
int xml_localname_check(cxobj *xn, void *arg);
#endif /* _CLIXON_XML_NSCTX_H */

View file

@ -369,7 +369,31 @@ xmldb_exists(clicon_handle h,
return retval;
}
/*! Delete database. Remove file
/*! Clear database cache if any for mem/size optimization only
* @param[in] h Clicon handle
* @param[in] db Database
* @retval -1 Error
* @retval 0 OK
*/
int
xmldb_clear(clicon_handle h,
const char *db)
{
cxobj *xt = NULL;
db_elmnt *de = NULL;
if (clicon_datastore_cache(h) != DATASTORE_NOCACHE){
if ((de = clicon_db_elmnt_get(h, db)) != NULL){
if ((xt = de->de_xml) != NULL){
xml_free(xt);
de->de_xml = NULL;
}
}
}
return 0;
}
/*! Delete database, clear cache if any. Remove file
* @param[in] h Clicon handle
* @param[in] db Database
* @retval -1 Error
@ -381,18 +405,10 @@ xmldb_delete(clicon_handle h,
{
int retval = -1;
char *filename = NULL;
db_elmnt *de = NULL;
cxobj *xt = NULL;
struct stat sb;
if (clicon_datastore_cache(h) != DATASTORE_NOCACHE){
if ((de = clicon_db_elmnt_get(h, db)) != NULL){
if ((xt = de->de_xml) != NULL){
xml_free(xt);
de->de_xml = NULL;
}
}
}
if (xmldb_clear(h, db) < 0)
goto done;
if (xmldb_db2file(h, db, &filename) < 0)
goto done;
if (lstat(filename, &sb) == 0)

View file

@ -177,10 +177,10 @@ struct xml{
int x_childvec_max;/* Length of allocated vector */
cvec *x_ns_cache; /* Cached vector of namespaces */
cvec *x_ns_cache; /* Cached vector of namespaces (set by bind-yang) */
yang_stmt *x_spec; /* Pointer to specification, eg yang,
by reference, dont free */
cg_var *x_cv; /* Cached value as cligen variable (eg xml_cmp) */
cg_var *x_cv; /* Cached value as cligen variable (set by xml_cmp) */
#ifdef XML_EXPLICIT_INDEX
struct search_index *x_search_index; /* explicit search index vectors */
#endif
@ -199,8 +199,7 @@ struct xmlbody{
int _xb_vector_i; /* internal use: xml_child_each */
int _xb_i; /* internal use for sorting:
see xml_enumerate and xml_cmp */
cbuf *xb_value_cb; /* attribute and body nodes have values (XXX: this consumes
memory) cv? */
cbuf *xb_value_cb; /* attribute and body nodes have values */
};
#endif /* XML_NEW_DIFFERENTIATE */
@ -240,6 +239,7 @@ xml_stats_global(uint64_t *nr)
return 0;
}
/*! Return the alloced memory of a single XML obj
* @param[in] x XML object
* @param[out] szp Size of this XML obj
@ -280,7 +280,7 @@ xml_stats_one(cxobj *x,
#ifdef XML_NEW_DIFFERENTIATE
sz += sizeof(struct xmlbody);
#else
sz += sizeof(struct xmlbody);
sz += sizeof(struct xml);
#endif
if (x->x_value_cb)
sz += cbuf_buflen(x->x_value_cb);
@ -294,6 +294,45 @@ xml_stats_one(cxobj *x,
return 0;
}
#if 0
/*! Print memory stats of a single object
*/
static int
xml_print_stats_one(FILE *f,
cxobj *x)
{
size_t sz = 0;
xml_stats_one(x, &sz);
fprintf(f, "%s:\n", xml_name(x));
fprintf(f, " sum: \t\t%u\n", (unsigned int)sz);
if (xml_type(x) == CX_ELMNT)
fprintf(f, " base struct: \t%u\n", (unsigned int)sizeof(struct xml));
else
fprintf(f, " base struct: \t%u\n", (unsigned int)sizeof(struct xmlbody));
if (x->x_name)
fprintf(f, " name: \t%u\n", (unsigned int)strlen(x->x_name) + 1);
if (x->x_prefix)
fprintf(f, " prefix: \t%u\n", (unsigned int)strlen(x->x_prefix) + 1);
if (xml_type(x) == CX_ELMNT){
if (x->x_childvec_max)
fprintf(f, " childvec: \t%u\n", (unsigned int)(x->x_childvec_max*sizeof(struct xml*)));
if (x->x_ns_cache)
fprintf(f, " ns-cache: \t%u\n", (unsigned int)cvec_size(x->x_ns_cache));
if (x->x_cv)
fprintf(f, " value-cv: \t%u\n", (unsigned int)cv_size(x->x_cv));
if (x->x_search_index)
fprintf(f, " search-index: \t%u\n",
(unsigned int)(strlen(x->x_search_index->si_name) + 1 + clixon_xvec_len(x->x_search_index->si_xvec)*sizeof(struct cxobj*)));
}
else{
if (x->x_value_cb)
fprintf(f, " value-cb: \t%u\n", cbuf_buflen(x->x_value_cb));
}
return 0;
}
#endif
/*! Return statistics of an XML tree recursively
* @param[in] xt XML object
* @param[out] szp Size of this XML obj recursively
@ -313,6 +352,7 @@ xml_stats(cxobj *xt,
clicon_err(OE_XML, EINVAL, "xml node is NULL");
goto done;
}
// xml_print_stats_one(stderr, xt);
*nrp += 1;
xml_stats_one(xt, &sz);
if (szp)
@ -377,21 +417,21 @@ xml_prefix(cxobj *xn)
}
/*! Set prefix of xnode, prefix is copied
* @param[in] xn xml node
* @param[in] localname new prefix, null-terminated string, copied by function
* @retval -1 on error with clicon-err set
* @param[in] xn XML node
* @param[in] prefix New prefix, null-terminated string, copied by function
* @retval -1 Error with clicon-err set
* @retval 0 OK
*/
int
xml_prefix_set(cxobj *xn,
char *localname)
char *prefix)
{
if (xn->x_prefix){
free(xn->x_prefix);
xn->x_prefix = NULL;
}
if (localname){
if ((xn->x_prefix = strdup(localname)) == NULL){
if (prefix){
if ((xn->x_prefix = strdup(prefix)) == NULL){
clicon_err(OE_XML, errno, "strdup");
return -1;
}
@ -1136,7 +1176,9 @@ xml_spec_set(cxobj *x,
* @param[in] x XML node (body and leaf/leaf-list)
* @retval cv CLIgen variable containing value of x body
* @retval NULL
* @note only applicable if x is body and has yang-spec and is leaf or leaf-list
* Only applicable if x is body and has yang-spec and is leaf or leaf-list
* Only accessed by xml_cv_cache as part of sorting in xml_cmp
* @see xml_cv_cache
*/
cg_var *
xml_cv(cxobj *x)
@ -1146,11 +1188,13 @@ xml_cv(cxobj *x)
return x->x_cv;
}
/*! Return (cached) cligen variable value of xml node
/*! Set (cached) cligen variable value of xml node
* @param[in] x XML node (body and leaf/leaf-list)
* @param[in] cv CLIgen variable containing value of x body
* @retval 0 OK
* @note only applicable if x is body and has yang-spec and is leaf or leaf-list
* Only applicable if x is body and has yang-spec and is leaf or leaf-list
* Only accessed by xml_cv_cache as part of sorting in xml_cmp
* @see xml_cv_cache
*/
int
xml_cv_set(cxobj *x,

View file

@ -423,7 +423,7 @@ _xml_parse(const char *str,
for (i = 0; i < xy.xy_xlen; i++) {
x = xy.xy_xvec[i];
/* Verify namespaces after parsing */
if (xml_apply0(x, CX_ELMNT, xml_localname_check, NULL) < 0)
if (xml2ns_recurse(x) < 0)
goto done;
/* Populate, ie associate xml nodes with yang specs
*/

View file

@ -424,7 +424,6 @@ xml_nsctx_yangspec(yang_stmt *yspec,
* if (xml2ns(xt, NULL, &namespace) < 0)
* err;
* @endcode
* @see xmlns_check
* @see xmlns_set cache is set
* @note, this function uses a cache.
*/
@ -474,6 +473,38 @@ xml2ns(cxobj *x,
return retval;
}
/*! Recursively check prefix / namespaces (and populate ns cache)
* @retval 1 OK
* @retval 0 (Some) prefix not found
* @retval -1 Error
*/
int
xml2ns_recurse(cxobj *xt)
{
int retval = -1;
cxobj *x;
char *prefix;
char *namespace;
x = NULL;
while ((x = xml_child_each(xt, x, CX_ELMNT)) != NULL) {
if ((prefix = xml_prefix(x)) != NULL){
namespace = NULL;
if (xml2ns(x, prefix, &namespace) < 0)
goto done;
if (namespace == NULL){
clicon_err(OE_XML, ENOENT, "No namespace associated with %s:%s", prefix, xml_name(x));
goto done;
}
}
if (xml2ns_recurse(x) < 0)
goto done;
}
retval = 0;
done:
return retval;
}
/*! Add a namespace attribute to an XML node, either default or specific prefix
* @param[in] x XML tree
* @param[in] prefix prefix/ns localname. If NULL then set default xmlns
@ -573,64 +604,3 @@ xml2prefix(cxobj *xn,
goto done;
}
/*! See if xmlns:[<localname>=]<uri> exists, if so return <uri>
*
* @param[in] xn XML node
* @param[in] nsn Namespace name
* @retval URI return associated URI if found
* @retval NULL No namespace name binding found for nsn
* @see xml2ns XXX coordinate
*/
static char *
xmlns_check(cxobj *xn,
char *nsn)
{
cxobj *x = NULL;
char *xns;
while ((x = xml_child_each(xn, x, CX_ATTR)) != NULL)
if ((xns = xml_prefix(x)) && strcmp(xns, "xmlns")==0 &&
strcmp(xml_name(x), nsn) == 0)
return xml_value(x);
return NULL;
}
/*! Check namespace of xml node by searching recursively among ancestors
* @param[in] xn xml node
* @param[in] namespace check validity of namespace
* @retval 0 Found / validated or no yang spec
* @retval -1 Not found
* @note This function is grossly inefficient
*/
int
xml_localname_check(cxobj *xn,
void *arg)
{
cxobj *xp = NULL;
char *nsn;
char *n;
yang_stmt *ys = xml_spec(xn);
/* No namespace name - comply */
if ((nsn = xml_prefix(xn)) == NULL)
return 0;
/* Check if NSN defined in same node */
if (xmlns_check(xn, nsn) != NULL)
return 0;
/* Check if NSN defined in some ancestor */
while ((xp = xml_parent(xn)) != NULL) {
if (xmlns_check(xp, nsn) != NULL)
return 0;
xn = xp;
}
/* Check if my namespace */
if ((n = yang_find_myprefix(ys)) != NULL && strcmp(nsn,n)==0)
return 0;
/* Check if any imported module */
if (yang_find_module_by_prefix(ys, nsn) != NULL)
return 0;
/* Not found, error */
clicon_err(OE_XML, ENOENT, "Namespace name %s in %s:%s not found",
nsn, nsn, xml_name(xn));
return -1;
}

View file

@ -198,6 +198,7 @@ xml_parse_prefixed_name(clixon_xml_yacc *xy,
xp = xy->xy_xparent;
if ((x = xml_new(name, xp, CX_ELMNT)) == NULL)
goto done;
/* Cant check namespaces here since local xmlns attributes loaded after */
if (xml_prefix_set(x, prefix) < 0)
goto done;

View file

@ -80,6 +80,8 @@
* @retval -1 Error
* @note only applicable if x is body and has yang-spec and is leaf or leaf-list
* Move to clixon_xml.c?
* As a side-effect sets the cache.
* Clear cache with xml_cv_set(x, NULL)
*/
static int
xml_cv_cache(cxobj *x,

View file

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Backend Memory tests, footprint using the clixon-conf state statistics
# Backend Memory tests of config data, footprint using the clixon-conf state statistics
# Create a large datastore, load it and measure
# Baseline: (thinkpad laptop) running db:
# 100K objects: 500K mem: 74M
@ -71,8 +71,8 @@ testrun(){
nr=$1
new "test params: -f $cfg"
if [ $BE -ne 0 ]; then
if [ $BE -ne 0 ]; then
new "generate config with $nr list entries"
echo -n "<config><x xmlns=\"urn:example:clixon\">" > $dir/startup_db
for (( i=0; i<$nr; i++ )); do
@ -80,7 +80,6 @@ testrun(){
done
echo "</x></config>" >> $dir/startup_db
new "kill old backend"
sudo clixon_backend -zf $cfg
if [ $? -ne 0 ]; then
@ -105,7 +104,7 @@ testrun(){
#
if [ -f /proc/$pid/statm ]; then # This ony works on Linux
# cat /proc/$pid/statm
echo -n " mem: "
echo -n " /proc/$pid/statm: "
cat /proc/$pid/statm|awk '{print $1*4/1000 "M"}'
fi
for db in running candidate startup; do
@ -132,10 +131,51 @@ testrun(){
fi
}
new "Memory test for backend with 1 $perfnr entries"
new "Memory test for backend with $perfnr entries"
testrun $perfnr
rm -rf $dir
# unset conditional parameters
unset perfnr
if false; then
# Example memory pretty-printed:
x:
base struct: 104
name: 2
childvec: 131072
(ns-cache: 115) # only in startup?
sum: 131178
xmlns:
base struct: 56
name: 6
value-cb: 38
sum: 100
y:
base struct: 104
name: 2
childvec: 16
(ns-cache: 115) # only in startup?
sum: 122
a:
base struct: 104
name: 2
childvec: 8
(ns-cache: 115) # only in startup?
value-cv: 72 # Value cached for sorting
sum: 186
body:
base struct: 56
name: 5
value-cb: 4
sum: 65
b:
sum: 114
base struct: 104
name: 2
childvec: 8
(ns-cache: 115) # only in startup?
fi

View file

@ -1,18 +1,24 @@
#!/usr/bin/env bash
# Startup performance tests for different formats and startup modes.
# Generate file in different formats:
# xml, xml pretty-printed, xml with prefixes, json
# Magic line must be first in script (see README.md)
s="$_" ; . ./lib.sh || if [ "$s" = $0 ]; then exit 0; else return 0; fi
# Number of list/leaf-list entries in file
: ${perfnr:=10000}
: ${pretty:=false}
APPNAME=example
cfg=$dir/scaling-conf.xml
fyang=$dir/scaling.yang
sx=$dir/sx.xml
sxpp=$dir/sxpp.xml
sxpre=$dir/sxpre.xml
sj=$dir/sj.xml
# NOTE, added a deep yang structure (x0,x1,x2) to expose performance due to turned off caching.
cat <<EOF > $fyang
module scaling{
@ -75,10 +81,23 @@ fi
# First generate large XML file
# Use it latter to generate startup-db in xml, tree formats
tmpx=$dir/tmp.xml
new "generate large startup config ($tmpx) with $perfnr entries"
if $pretty; then
cat<<EOF > $tmpx
new "generate plain xml startup config ($sx) with $perfnr entries"
echo -n "<config><x0 xmlns=\"urn:example:clixon\"><x1><x2><name>ip</name><x>" > $sx
for (( i=0; i<$perfnr; i++ )); do
echo -n "<y><a>$i</a><b>$i</b></y>" >> $sx
done
echo "</x></x2></x1></x0></config>" >> $sx
new "generate prefixed xml startup config ($sxpre) with $perfnr entries"
echo -n "<config><ex:x0 xmlns:ex=\"urn:example:clixon\"><ex:x1><ex:x2><ex:name>ip</ex:name><ex:x>" > $sxpre
for (( i=0; i<$perfnr; i++ )); do
echo -n "<ex:y><ex:a>$i</ex:a><ex:b>$i</ex:b></ex:y>" >> $sxpre
done
echo "</ex:x></ex:x2></ex:x1></ex:x0></config>" >> $sxpre
new "generate pretty-printed xml startup config ($sxpp) with $perfnr entries"
cat<<EOF > $sxpp
<config>
<x0 xmlns="urn:example:clixon">
<x1>
@ -87,62 +106,58 @@ cat<<EOF > $tmpx
<x>
EOF
for (( i=0; i<$perfnr; i++ )); do
echo " <y>" >> $tmpx
echo " <a>$i</a>" >> $tmpx
echo " <b>$i</b>" >> $tmpx
echo " </y>" >> $tmpx
echo " <y>" >> $sxpp
echo " <a>$i</a>" >> $sxpp
echo " <b>$i</b>" >> $sxpp
echo " </y>" >> $sxpp
done
cat<<EOF >> $tmpx
cat<<EOF >> $sxpp
</x>
</x2>
</x1>
</x0>
</config>
EOF
else
echo -n "<config><x0 xmlns=\"urn:example:clixon\"><x1><x2><name>ip</name><x>" > $tmpx
for (( i=0; i<$perfnr; i++ )); do
echo -n "<y><a>$i</a><b>$i</b></y>" >> $tmpx
done
echo "</x></x2></x1></x0></config>" >> $tmpx
fi
if false; then # XXX JSON dont work as datastore yet
# Then generate large JSON file (cant translate namespace - long story)
tmpj=$dir/tmp.json
new "generate large startup config ($tmpj) with $perfnr entries"
echo -n '{"config": {"scaling:x":{"y":[' > $tmpj
if false; then # ---------- not supported
new "generate pretty-printed json startup config ($sj) with $perfnr entries"
echo -n '{"config": {"scaling:x":{"y":[' > $sj
for (( i=0; i<$perfnr; i++ )); do
if [ $i -ne 0 ]; then
echo -n ",{\"a\":$i,\"b\":$i}" >> $tmpj
echo -n ",{\"a\":$i,\"b\":$i}" >> $sj
else
echo -n "{\"a\":$i,\"b\":$i}" >> $tmpj
echo -n "{\"a\":$i,\"b\":$i}" >> $sj
fi
done
echo "]}}}" >> $tmpj
echo "]}}}" >> $sj
fi
# Loop over mode and format
for mode in startup running; do
file=$dir/${mode}_db
for format in xml; do # json - something w namespaces
sudo rm -f $file
sudo touch $file
sudo chmod 666 $file
case $format in
xml)
echo "cp $tmpx $file"
cp $tmpx $file
mode=startup # running
format=xml
sdb=$dir/${mode}_db
for variant in prefix plain pretty; do
case $variant in
plain)
f=$sx
;;
json)
cp $tmpj $file
pretty)
f=$sxpp
;;
prefix)
f=$sxpre
;;
esac
new "Startup format: $format mode:$mode"
sudo rm -f $sdb
sudo touch $sdb
sudo chmod 666 $sdb
cp $f $sdb
new "Startup $format $variant"
# Cannot use start_backend here due to expected error case
{ time -p sudo $clixon_backend -F1 -D $DBG -s $mode -f $cfg -y $fyang -o CLICON_XMLDB_FORMAT=$format 2> /dev/null; } 2>&1 | awk '/real/ {print $2}'
done
done
rm -rf $dir