* xsd regular expression support for character classes [https://github.com/clicon/clixon/issues/68]

* added support for \c, \d, \w, \W, \s, \S
This commit is contained in:
Olof hagsand 2019-01-17 16:13:34 +01:00
parent 03ac48ab2b
commit 66f80e9304
3 changed files with 83 additions and 19 deletions

View file

@ -641,41 +641,68 @@ clixon_trim(char *str)
* POSIX ERE regexps according to man regex(3).
* @param[in] xsd Input regex string according XSD
* @param[out] posix Output (malloced) string according to POSIX ERE
* @see http://www.w3.org/TR/2004/REC-xmlschema-2-20041028
* @note that the translation is ad-hoc, may need more translations
* @see https://www.w3.org/TR/2004/REC-xmlschema-2-20041028/#regexs
* @see https://www.regular-expressions.info/posixbrackets.html#class translation
* Translation is not complete but covers some character sequences:
* \d decimal digit
* \w alphanum + underscore
*/
int
regexp_xsd2posix(char *xsd,
char **posix)
{
int retval = -1;
char *x;
char *p = NULL;
cbuf *cb = NULL;
char x;
int i;
int len;
int esc;
len = strlen(xsd);
x = xsd;
while ((x = strstr(x, "\\d")) != NULL){
len += 3; /* \d --> [0-9] */
x += 2;
}
if ((p = malloc(len+1)) == NULL){
clicon_err(OE_UNIX, errno, "malloc");
if ((cb = cbuf_new()) == NULL){
clicon_err(OE_UNIX, errno, "cbuf_new");
goto done;
}
memset(p, 0, len+1);
*posix = p;
esc=0;
for (i=0; i<strlen(xsd); i++){
if (strncmp(&xsd[i], "\\d", 2) == 0){
strcpy(p, "[0-9]");
p += 5; i++;
x = xsd[i];
if (esc){
esc = 0;
switch (x){
case 'c': /* xml namechar */
cprintf(cb, "[0-9a-zA-Z\\\\.\\\\-_:]");
break;
case 'd':
cprintf(cb, "[0-9]");
break;
case 'w':
cprintf(cb, "[0-9a-zA-Z_\\\\-]");
break;
case 'W':
cprintf(cb, "[^0-9a-zA-Z_\\\\-]");
break;
case 's':
cprintf(cb, "[ \t\r\n]");
break;
case 'S':
cprintf(cb, "[^ \t\r\n]");
break;
default:
cprintf(cb, "\\%c", x);
break;
}
}
else if (x == '\\')
esc++;
else
*p++ = xsd[i];
cprintf(cb, "%c", x);
}
if ((*posix = strdup(cbuf_get(cb))) == NULL){
clicon_err(OE_UNIX, errno, "strdup");
goto done;
}
retval = 0;
done:
if (cb)
cbuf_free(cb);
return retval;
}