/* * Copyright (C) 2009-2016 Olof Hagsand and Benny Holmgren This file is part of CLIXON. CLIXON is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. CLIXON is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with CLIXON; see the file LICENSE. If not, see . * Limited XML XPATH and XSLT functions. * NOTE: there is a main function at the end of this file where you can test out * different xpath expressions. * Look at the end of the file for a test unit program */ /* https://www.w3.org/TR/xpath/ Implementation of a limited xslt xpath syntax. Some examples. Given the following xml tree: 42 99 22 With the following xpath examples. There are some diffs and many limitations compared to the xml standards: / whole tree ... /bbb /aaa/bbb 42 99 //bbb as above //b?b as above //b\* as above //b\*\/ccc 42 99 //\*\/ccc 42 99 22 -- //bbb@x x="hello" //bbb[@x] 42 99 //bbb[@x=hello] 42 //bbb[@x="hello"] as above //bbb[0] 42 //bbb[ccc=99] 99 --- //\*\/[ccc=99] same as above '//bbb | //ddd' 42 99 22 (NB spaces) etc For xpath v1.0 see http://www.w3.org/TR/xpath/ record[name=c][time=d] in c 45654df4-2292-45d3-9ca5-ee72452568a8 */ #include #include #include #include #include #include #include #include #include /* cligen */ #include /* clicon */ #include "clixon_err.h" #include "clixon_log.h" #include "clixon_xml.h" #include "clixon_xsl.h" /* Constants */ #define XPATH_VEC_START 128 /* * Types */ struct searchvec{ cxobj **sv_v0; /* here is result */ int sv_v0len; cxobj **sv_v1; /* this is tmp storage */ int sv_v1len; int sv_max; }; typedef struct searchvec searchvec; /* Local types */ enum axis_type{ A_SELF, A_CHILD, A_PARENT, A_ROOT, A_ANCESTOR, A_DESCENDANT_OR_SELF, /* actually descendant-or-self */ }; struct map_str2int{ char *ms_str; /* string as in 4.2.4 in RFC 6020 */ int ms_int; }; /* Mapping between axis type string <--> int */ static const struct map_str2int atmap[] = { {"self", A_SELF}, {"child", A_CHILD}, {"parent", A_PARENT}, {"root", A_ROOT}, {"ancestor", A_ANCESTOR}, {"descendant-or-self", A_DESCENDANT_OR_SELF}, {NULL, -1} }; struct xpath_predicate{ struct xpath_predicate *xp_next; char *xp_expr; }; struct xpath_element{ struct xpath_element *xe_next; enum axis_type xe_type; char *xe_str; /* eg for child */ struct xpath_predicate *xe_predicate; /* eg within [] */ }; static int xpath_split(char *xpathstr, char **pathexpr); static char *axis_type2str(enum axis_type type) __attribute__ ((unused)); static char * axis_type2str(enum axis_type type) { const struct map_str2int *at; for (at = &atmap[0]; at->ms_str; at++) if (at->ms_int == type) return at->ms_str; return NULL; } static int xpath_print(FILE *f, struct xpath_element *xplist) { struct xpath_element *xe; struct xpath_predicate *xp; for (xe=xplist; xe; xe=xe->xe_next){ fprintf(f, "\t:%s %s ", axis_type2str(xe->xe_type), xe->xe_str?xe->xe_str:""); for (xp=xe->xe_predicate; xp; xp=xp->xp_next) fprintf(f, "[%s]", xp->xp_expr); } return 0; } static int xpath_parse_predicate(struct xpath_element *xe, char *pred) { int retval = -1; struct xpath_predicate *xp; char *s; int i; int len; len = strlen(pred); for (i=len-2; i>=0; i--){ /* -2 since we search for ][ */ s = &pred[i]; if (i==0 || (*(s)==']' && *(s+1)=='[')){ if (i) { *(s)= '\0'; s += 2; } if ((xp = malloc(sizeof(*xp))) == NULL){ clicon_err(OE_UNIX, errno, "malloc"); goto done; } memset(xp, 0, sizeof(*xp)); if ((xp->xp_expr = strdup(s)) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } xp->xp_next = xe->xe_predicate; xe->xe_predicate = xp; } } retval = 0; done: return retval; } static int xpath_element_new(enum axis_type atype, char *str, struct xpath_element ***xpnext) { int retval = -1; struct xpath_element *xe; char *str1 = NULL; char *pred; if ((xe = malloc(sizeof(*xe))) == NULL){ clicon_err(OE_UNIX, errno, "malloc"); goto done; } memset(xe, 0, sizeof(*xe)); xe->xe_type = atype; if (str){ if ((str1 = strdup(str)) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } if (xpath_split(str1, &pred) < 0) /* Can be more predicates */ goto done; if (strlen(str1)){ if ((xe->xe_str = strdup(str1)) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } } else{ if ((xe->xe_str = strdup("*")) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } } if (pred && strlen(pred)){ if (xpath_parse_predicate(xe, pred) < 0) goto done; } } (**xpnext) = xe; *xpnext = &xe->xe_next; retval = 0; done: if (str1) free(str1); return retval; } static int xpath_element_free(struct xpath_element *xe) { struct xpath_predicate *xp; if (xe->xe_str) free(xe->xe_str); while ((xp = xe->xe_predicate) != NULL){ xe->xe_predicate = xp->xp_next; if (xp->xp_expr) free(xp->xp_expr); free(xp); } free(xe); return 0; } static int xpath_free(struct xpath_element *xplist) { struct xpath_element *xe, *xe_next; for (xe=xplist; xe; xe=xe_next){ xe_next = xe->xe_next; xpath_element_free(xe); } return 0; } /* * // is short for /descendant-or-self::node()/ */ static int xpath_parse(char *xpath, struct xpath_element **xplist0) { int retval = -1; int nvec = 0; char *p; char *s; char *s0; int i; struct xpath_element *xplist = NULL; struct xpath_element **xpnext = &xplist; if ((s0 = strdup(xpath)) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } s = s0; if (strlen(s)) nvec = 1; while ((p = index(s, '/')) != NULL){ nvec++; *p = '\0'; s = p+1; } s = s0; for (i=0; i= * - * - = * @see https://www.w3.org/TR/xpath/#predicates */ static int xpath_expr(char *predicate_expression, uint16_t flags, cxobj ***vec0, size_t *vec0len) { char *e_a; char *e_v; int i; int retval = -1; cxobj *x; cxobj *xv; cxobj **vec = NULL; size_t veclen = 0; int oplen; char *tag; char *val; char *e0; char *e; if ((e0 = strdup(predicate_expression)) == NULL){ clicon_err(OE_UNIX, errno, "strdup"); goto done; } e = e0; if (*e == '@'){ /* @ attribute */ e++; e_v=e; e_a = strsep(&e_v, "="); if (e_a == NULL){ clicon_err(OE_XML, errno, "%s: malformed expression: [@%s]", __FUNCTION__, e); goto done; } for (i=0; i<*vec0len; i++){ xv = (*vec0)[i]; if ((x = xml_find(xv, e_a)) != NULL && (xml_type(x) == CX_ATTR)){ if (!e_v || strcmp(xml_value(x), e_v) == 0){ clicon_debug(2, "%s %x %x", __FUNCTION__, flags, xml_flag(xv, flags)); if (flags==0x0 || xml_flag(xv, flags)){ if (cxvec_append(xv, &vec, &veclen) < 0) goto done; break; /* xv added */ } } } } } else{ /* either or , where ='=' for now */ oplen = strcspn(e, "="); if (strlen(e+oplen)==0){ /* no operator */ if (sscanf(e, "%d", &i) == 1){ /* number */ if (i < *vec0len){ xv = (*vec0)[i]; /* XXX: cant compress: gcc breaks */ clicon_debug(2, "%s %x %x", __FUNCTION__, flags, xml_flag(xv, flags)); if (flags==0x0 || xml_flag(xv, flags)) if (cxvec_append(xv, &vec, &veclen) < 0) goto done; } } else{ clicon_err(OE_XML, errno, "%s: malformed expression: [%s]", __FUNCTION__, e); goto done; } } else{ if ((tag = strsep(&e, "=")) == NULL){ clicon_err(OE_XML, errno, "%s: malformed expression: [%s]", __FUNCTION__, e); goto done; } for (i=0; i<*vec0len; i++){ xv = (*vec0)[i]; /* Check if more may match,... */ x = NULL; while ((x = xml_child_each(xv, x, CX_ELMNT)) != NULL) { if (strcmp(tag, xml_name(x)) != 0) continue; if ((val = xml_body(x)) != NULL && strcmp(val, e) == 0){ clicon_debug(2, "%s %x %x", __FUNCTION__, flags, xml_flag(xv, flags)); if (flags==0x0 || xml_flag(xv, flags)) if (cxvec_append(xv, &vec, &veclen) < 0) goto done; } } } } } /* copy the array from 1 to 0 */ free(*vec0); *vec0 = vec; *vec0len = veclen; retval = 0; done: if (e0) free(e0); return retval; } /*! Given vec0, add matches to vec1 * @param[in] xe XPATH in structured (parsed) form * @param[in] descendants0 * @param[in] vec0 vector of XML trees * @param[in] vec0len length of XML trees * @param[in] flags if != 0, only match xml nodes matching flags * @param[out] vec2 Result XML node vector * @param[out] vec2len Length of result vector. * XXX: Kommer in i funktionen med vec0, resultatet appendas i vec1 * vec0 --> vec * Det är nog bra om vec0 inte ändras, är input parameter * Vid utgång ska vec1 innehålla resultatet. * Internt då? * XXX: hantering av (input)vec0-->vec-->vec2-->vec1 (resultat) */ static int xpath_find(struct xpath_element *xe, int descendants0, cxobj **vec0, size_t vec0len, uint16_t flags, cxobj ***vec2, size_t *vec2len ) { int retval = -1; int i; int j; cxobj *x = NULL; cxobj *xv; int descendants = 0; cxobj **vec1 = NULL; size_t vec1len = 0; struct xpath_predicate *xp; if (xe == NULL){ /* append */ for (i=0; ixe_type), xe->xe_str?xe->xe_str:""); #endif switch (xe->xe_type){ case A_SELF: break; case A_PARENT: for (i=0; ixe_str, CX_ELMNT, flags, &vec1, &vec1len) < 0) goto done; } } else for (i=0; ixe_str, xml_name(x), 0) == 0){ clicon_debug(2, "%s %x %x", __FUNCTION__, flags, xml_flag(x, flags)); if (flags==0x0 || xml_flag(x, flags)) if (cxvec_append(x, &vec1, &vec1len) < 0) goto done; } } } free(vec0); vec0 = vec1; vec0len = vec1len; break; case A_DESCENDANT_OR_SELF: /* Instead of collecting all descendants (which we could) just set a flag and treat that in the next operation */ descendants++; break; default: break; } /* remove duplicates */ for (i=0; ixe_predicate; xp; xp = xp->xp_next){ if (xpath_expr(xp->xp_expr, flags, &vec0, &vec0len) < 0) goto done; } if (xpath_find(xe->xe_next, descendants, vec0, vec0len, flags, vec2, vec2len) < 0) goto done; retval = 0; done: return retval; } /*! Transform eg "a/b[kalle]" -> "a/b" e="kalle" * @param[in,out] xpathstr Eg "a/b[kalle]" -> "a/b" * @param[out] pathexpr Eg "kalle" * Which also means: * "a/b[foo][bar]" -> pathexpr: "foo][bar" * @note destructively modify xpathstr, no new strings allocated */ static int xpath_split(char *xpathstr, char **pathexpr) { int retval = -1; int last; char *pe = NULL; if (strlen(xpathstr)){ last = strlen(xpathstr) - 1; /* XXX: this could be -1.. */ if (xpathstr[last] == ']'){ xpathstr[last] = '\0'; if (strlen(xpathstr)){ if ((pe = index(xpathstr,'[')) != NULL){ *pe = '\0'; pe++; } } if (pe==NULL){ clicon_err(OE_XML, errno, "%s: mismatched []: %s", __FUNCTION__, xpathstr); goto done; } } } retval = 0; done: *pathexpr = pe; return retval; } /*! Process single xpath expression on xml tree * @param[in] xpath string with XPATH syntax * @param[in] vec0 vector of XML trees * @param[in] vec0len length of XML trees * @param[in] flags if != 0, only match xml nodes matching flags * @param[out] vec2 Result XML node vector * @param[out] vec2len Length of result vector. */ static int xpath_exec(char *xpath, cxobj **vec0, size_t vec0len, uint16_t flags, cxobj ***vec2, size_t *vec2len) { struct xpath_element *xplist; cxobj **vec1; size_t vec1len; if (cxvec_dup(vec0, vec0len, &vec1, &vec1len) < 0) goto done; if (xpath_parse(xpath, &xplist) < 0) goto done; if (debug > 1) xpath_print(stderr, xplist); if (xpath_find(xplist, 0, vec1, vec1len, flags, vec2, vec2len) < 0) goto done; if (xpath_free(xplist) < 0) goto done; done: return 0; } /* xpath_exec */ /*! Intermediate xpath function to handle 'conditional' cases. * For example: xpath = //a | //b. * xpath_first+ splits xpath up in several subcalls * (eg xpath=//a and xpath=//b) and collects the results. * Note: if a match is found in both, two (or more) same results will be * returned. * Note, this could be 'folded' into xpath1 but I judged it too complex. */ static int xpath_choice(cxobj *xtop, char *xpath0, uint16_t flags, cxobj ***vec1, size_t *vec1len) { int retval = -1; char *s0; char *s1; char *s2; char *xpath; cxobj **vec0 = NULL; size_t vec0len = 0; if ((s0 = strdup(xpath0)) == NULL){ clicon_err(OE_XML, errno, "%s: strdup", __FUNCTION__); goto done; } s2 = s1 = s0; if ((vec0 = calloc(1, sizeof(cxobj *))) == NULL){ clicon_err(OE_UNIX, errno, "calloc"); goto done; } vec0[0] = xtop; vec0len++; while (s1 != NULL){ s2 = strstr(s1, " | "); if (s2 != NULL){ *s2 = '\0'; /* terminate xpath */ s2 += 3; } xpath = s1; s1 = s2; if (xpath_exec(xpath, vec0, vec0len, flags, vec1, vec1len) < 0) goto done; } retval = 0; done: if (s0) free(s0); if (vec0) free(vec0); return retval; } static cxobj * xpath_first0(cxobj *cxtop, char *xpath) { cxobj **vec0 = NULL; size_t vec0len = 0; cxobj *xn = NULL; if (xpath_choice(cxtop, xpath, 0, &vec0, &vec0len) < 0) goto done; if (vec0len) xn = vec0[0]; else xn = NULL; done: if (vec0) free(vec0); return xn; } /*! A restricted xpath function where the first matching entry is returned * See xpath1() on details for subset. * args: * @param[in] cxtop xml-tree where to search * @param[in] xpath string with XPATH syntax * @retval xml-tree of first match, or NULL on error. * * @code * cxobj *x; * if ((x = xpath_first(xtop, "//symbol/foo")) != NULL) { * ... * } * @endcode * Note that the returned pointer points into the original tree so should not be freed * after use. * @see also xpath_vec. */ cxobj * xpath_first(cxobj *cxtop, char *format, ...) { cxobj *retval = NULL; va_list ap; size_t len; char *xpath; va_start(ap, format); len = vsnprintf(NULL, 0, format, ap); va_end(ap); /* allocate a message string exactly fitting the message length */ if ((xpath = malloc(len+1)) == NULL){ clicon_err(OE_UNIX, errno, "malloc"); goto done; } /* second round: compute write message from reason and args */ va_start(ap, format); if (vsnprintf(xpath, len+1, format, ap) < 0){ clicon_err(OE_UNIX, errno, "vsnprintf"); va_end(ap); goto done; } va_end(ap); retval = xpath_first0(cxtop, xpath); done: if (xpath) free(xpath); return retval; } /*! A restricted xpath iterator that loops over all matching entries. Dont use. * * See xpath1() on details for subset. * @param[in] cxtop xml-tree where to search * @param[in] xpath string with XPATH syntax * @param[in] xprev iterator/result should be initiated to NULL * @retval xml-tree of n:th match, or NULL on error. * * @code * cxobj *x = NULL; * while ((x = xpath_each(cxtop, "//symbol/foo", x)) != NULL) { * ... * } * @endcode * * Note that the returned pointer points into the original tree so should not be freed * after use. * @see also xpath, xpath_vec. * NOTE: uses a static variable: consider replacing with xpath_vec() instead */ cxobj * xpath_each(cxobj *cxtop, char *xpath, cxobj *xprev) { static cxobj **vec0 = NULL; /* XXX */ static size_t vec0len = 0; cxobj *xn = NULL; int i; if (xprev == NULL){ if (vec0) // XXX free(vec0); // XXX vec0len = 0; if (xpath_choice(cxtop, xpath, 0, &vec0, &vec0len) < 0) goto done; } if (vec0len){ if (xprev==NULL) xn = vec0[0]; else{ for (i=0; i=vec0len-1) xn = NULL; else xn = vec0[i+1]; } } else xn = NULL; done: return xn; } /*! A restricted xpath that returns a vector of matches * * See xpath1() on details for subset. * @param[in] cxtop xml-tree where to search * @param[in] xpath string with XPATH syntax * @param[out] vec vector of xml-trees. Vector must be free():d after use * @param[out] veclen returns length of vector in return value * @retval 0 OK * @retval -1 error. * * @code * cxobj **vec; * size_t veclen; * if (xpath_vec(cxtop, "//symbol/foo", &vec, &veclen) < 0) * got err; * for (i=0; i] * read xml from input * Example compile: gcc -g -o xpath -I. -I../clixon ./clixon_xsl.c -lclixon -lcligen * Example run: echo "" | xpath "a" */ #if 0 /* Test program */ static int usage(char *argv0) { fprintf(stderr, "usage:%s .\n\tInput on stdin\n", argv0); exit(0); } int main(int argc, char **argv) { int i; cxobj **xv; cxobj *x; cxobj *xn; size_t xlen = 0; if (argc != 2){ usage(argv[0]); return 0; } if (clicon_xml_parse_file(0, &x, "") < 0){ fprintf(stderr, "parsing 2\n"); return -1; } printf("\n"); if (xpath_vec(x, argv[1], &xv, &xlen) < 0) return -1; if (xv){ for (i=0; i