* Better compliance with XSD regexps (when transforming to Posix regexps)

* Added `\p{L}` and `\p{N}`
   * Added escaping of `$`
* Added regexp [test/test_pattern.sh]
This commit is contained in:
Olof hagsand 2019-05-22 20:21:48 +02:00
parent 7e109d1d4b
commit f7771d86c2
8 changed files with 724 additions and 32 deletions

View file

@ -140,8 +140,11 @@
### Minor changes
* Regexp improvements
* Added check for libxml in configure';
* Better compliance with XSD regexps (when transforming to Posix regexps)
* Added `\p{L}` and `\p{N}`
* Added escaping of `$`
* Added clixon_util_regexp utility function
* Added regexp [test/test_pattern.sh]
* Yang state get improvements
* Integrated state and config into same tree on retrieval, not separate trees
* Added cli functions `cli_show_config_state()` and `cli_show_auto_state()` for showing combined config and state info.

View file

@ -709,6 +709,11 @@ clixon_trim(char *str)
* "other" characters: #x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}]
* \i letters + underscore and colon
* \c XML Namechar, see: https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
*
* Not implemented:
* \p{X} category escape. the ones identified in openconfig and yang-models are:
* \p{L} Letters [ultmo]?
* \p{N} Numbers [dlo]?
*/
int
regexp_xsd2posix(char *xsd,
@ -718,6 +723,7 @@ regexp_xsd2posix(char *xsd,
cbuf *cb = NULL;
char x;
int i;
int j; /* lookahead */
int esc;
int minus = 0;
@ -743,12 +749,24 @@ regexp_xsd2posix(char *xsd,
case 'i': /* initial */
cprintf(cb, "[a-zA-Z_:]");
break;
case 'w': /* word */
//cprintf(cb, "[0-9a-zA-Z_\\\\-]")
cprintf(cb, "[^[:punct:][:space:][:cntrl:]]");
break;
case 'W': /* inverse of \w */
cprintf(cb, "[[:punct:][:space:][:cntrl:]]");
case 'p': /* category escape: \p{IsCategory} */
j = i+1;
if (j+2 < strlen(xsd) &&
xsd[j] == '{' &&
xsd[j+2] == '}'){
switch (xsd[j+1]){
case 'L': /* Letters */
cprintf(cb, "a-zA-Z"); /* assume in [] */
break;
case 'N': /* Numbers */
cprintf(cb, "0-9");
break;
default:
break;
}
i = j+2;
}
/* if syntax error, just leave it */
break;
case 's':
cprintf(cb, "[ \t\r\n]");
@ -756,6 +774,13 @@ regexp_xsd2posix(char *xsd,
case 'S':
cprintf(cb, "[^ \t\r\n]");
break;
case 'w': /* word */
//cprintf(cb, "[0-9a-zA-Z_\\\\-]")
cprintf(cb, "[^[:punct:][:space:][:cntrl:]]");
break;
case 'W': /* inverse of \w */
cprintf(cb, "[[:punct:][:space:][:cntrl:]]");
break;
default:
cprintf(cb, "\\%c", x);
break;
@ -763,6 +788,8 @@ regexp_xsd2posix(char *xsd,
}
else if (x == '\\')
esc++;
else if (x == '$')
cprintf(cb, "\\%c", x);
else if (x == ']' && minus){
cprintf(cb, "-]");
minus = 0;

View file

@ -96,7 +96,7 @@ identifier [A-Za-z_][A-Za-z0-9_\-\.]*
%s STRING
%s STRINGDQ
%s STRINGSQ
%s ESCAPE
%s DQESC
%s COMMENT1
%s COMMENT2
%s UNKNOWN
@ -228,7 +228,7 @@ identifier [A-Za-z_][A-Za-z0-9_\-\.]*
<STRING>. { clixon_yang_parselval.string = strdup(yytext);
return CHARS;}
<STRINGDQ>\\ { _YY->yy_lex_state = STRINGDQ; BEGIN(ESCAPE); }
<STRINGDQ>\\ { _YY->yy_lex_state = STRINGDQ; BEGIN(DQESC); }
<STRINGDQ>\" { BEGIN(_YY->yy_lex_string_state); return *yytext; }
<STRINGDQ>\n { _YY->yy_linenum++;
clixon_yang_parselval.string = strdup(yytext);
@ -243,7 +243,7 @@ identifier [A-Za-z_][A-Za-z0-9_\-\.]*
<STRINGSQ>[^'\n]+ { clixon_yang_parselval.string = strdup(yytext);
return CHARS;}
<ESCAPE>. { BEGIN(_YY->yy_lex_state);
<DQESC>[nt"\\] { BEGIN(_YY->yy_lex_state);
clixon_yang_parselval.string = strdup(yytext);
return CHARS; }
<COMMENT1>[^*\n]* /* eat anything that's not a '*' */

View file

@ -31,6 +31,7 @@ if [ -f ./site.sh ]; then
# test skiplist.
for f in $SKIPLIST; do
if [ "$testfile" = "$f" ]; then
echo ...skipped
return -1 # skip
fi
done

650
test/test_pattern.sh Executable file

File diff suppressed because one or more lines are too long

View file

@ -197,7 +197,7 @@ new "restconf delete $perfreq small config"
rnd=$(( ( RANDOM % $perfnr ) ))
curl -s -X DELETE http://localhost/restconf/data/scaling:x/y=$rnd
done > /dev/null; } 2>&1 | awk '/real/ {print $2}'
exit
# Now do leaf-lists istead of leafs
new "generate large leaf-list config"

View file

@ -153,7 +153,7 @@ if [ $BE -ne 0 ]; then
start_backend -s init -f $cfg -y $fyang
new "waiting"
sleep $RCWAIT
wait_backend
fi
new "cli defined extension"

View file

@ -33,6 +33,7 @@
* Utility for compiling regexp and checking validity
* gcc -I /usr/include/libxml2 regex.c -o regex -lxml2
* @see http://www.w3.org/TR/2004/REC-xmlschema-2-20041028
*/
#ifdef HAVE_CONFIG_H
#include "clixon_config.h" /* generated by config & autoconf */
@ -45,8 +46,6 @@
#include <stdlib.h>
#include <limits.h>
#undef HAVE_LIBXML2
#ifdef HAVE_LIBXML2 /* Actually it should check for a header file */
#include <libxml/xmlregexp.h>
#endif
@ -66,7 +65,9 @@
static int
regex_libxml2(char *regexp0,
char *content0,
int nr)
int nr,
int debug)
{
int retval = -1;
#ifdef HAVE_LIBXML2
@ -92,7 +93,8 @@ regex_libxml2(char *regexp0,
static int
regex_posix(char *regexp,
char *content,
int nr)
int nr,
int debug)
{
int retval = -1;
char *posix = NULL;
@ -105,6 +107,7 @@ regex_posix(char *regexp,
if (regexp_xsd2posix(regexp, &posix) < 0)
goto done;
clicon_debug(1, "posix: %s", posix);
len0 = strlen(posix);
if (len0 > sizeof(pattern)-5){
fprintf(stderr, "pattern too long\n");
@ -134,15 +137,15 @@ regex_posix(char *regexp,
static int
usage(char *argv0)
{
fprintf(stderr, "usage:%s [options] (either one of -p or -x)\n"
fprintf(stderr, "usage:%s [options]\n"
"where options are\n"
"\t-h \t\tHelp\n"
"\t-D <level>\tDebug\n"
"\t-p \txsd->posix translation regexp\n"
"\t-x \tlibxml2 regexp\n"
"\t-n <nr> \tIterate content match (0 means only compile)\n"
"\t-p \txsd->posix translation regexp (default)\n"
"\t-x \tlibxml2 regexp (alternative to -p)\n"
"\t-n <nr> \tIterate content match (default: 1, 0: no match only compile)\n"
"\t-r <regexp> \tregexp (mandatory)\n"
"\t-c <string> \tValue content string(mandatory)\n",
"\t-c <string> \tValue content string(mandatory if -n > 0)\n",
argv0
);
exit(0);
@ -157,10 +160,9 @@ main(int argc,
int c;
char *regexp = NULL;
char *content = NULL;
int posix = 0;
int libxml2 = 0;
int ret;
int nr = 1;
int mode = 0; /* 0 is posix, 1 is libxml */
optind = 1;
opterr = 0;
@ -174,14 +176,14 @@ main(int argc,
usage(argv0);
break;
case 'p': /* xsd->posix */
posix++;
mode = 0;
break;
case 'n': /* Number of iterations */
if ((nr = atoi(optarg)) < 0)
usage(argv0);
break;
case 'x': /* libxml2 */
libxml2++;
mode = 1;
break;
case 'r': /* regexp */
regexp = optarg;
@ -194,22 +196,31 @@ main(int argc,
break;
}
clicon_log_init(__FILE__, debug?LOG_DEBUG:LOG_INFO, CLICON_LOG_STDERR);
if (regexp == NULL || content == NULL)
if (regexp == NULL){
fprintf(stderr, "-r mandatory\n");
usage(argv0);
if (posix == libxml2)
}
if (nr > 0 && content == NULL){
fprintf(stderr, "-c mandatory (if -n > 0)\n");
usage(argv0);
}
if (mode != 0 && mode != 1){
fprintf(stderr, "Neither posix or libxml2 set\n");
usage(argv0);
}
clicon_debug(1, "regexp:%s", regexp);
clicon_debug(1, "content:%s", content);
if (libxml2){
if ((ret = regex_libxml2(regexp, content, nr)) < 0)
if (mode == 0){
if ((ret = regex_posix(regexp, content, nr, debug)) < 0)
goto done;
}
else if (posix){
if ((ret = regex_posix(regexp, content, nr)) < 0)
else if (mode == 1){
if ((ret = regex_libxml2(regexp, content, nr, debug)) < 0)
goto done;
}
else
goto done;
usage(argv0);
fprintf(stdout, "%d\n", ret);
exit(ret);
retval = 0;