diff --git a/CHANGELOG.md b/CHANGELOG.md index b292de24..25ab2ec3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ Users may have to change how they access the system ### Corrected Bugs +* Fixed: RESTCONF Native: Failed binding of socket in network namespace caused process zombie * Fixed problems with XPATH composite operations and functions in netconf get/get-config operations. * See [XPATH issues #219](https://github.com/clicon/clixon/issues/219) * Fix Union in xpath [XPATH issues #219](https://github.com/clicon/clixon/issues/219) diff --git a/lib/src/clixon_netns.c b/lib/src/clixon_netns.c index f6f74199..24720694 100644 --- a/lib/src/clixon_netns.c +++ b/lib/src/clixon_netns.c @@ -105,6 +105,7 @@ get_sock(int usock, done: return retval; } + #endif /* HAVE_SETNS */ /*! Create and bind stream socket @@ -196,7 +197,7 @@ fork_netns_socket(const char *netns, int retval = -1; int sp[2] = {-1, -1}; pid_t child; - int status = 0; + int wstatus = 0; char nspath[MAXPATHLEN]; /* Path to namespace file */ struct stat st; @@ -223,21 +224,25 @@ fork_netns_socket(const char *netns, /* Switch to namespace */ if ((fd=open(nspath, O_RDONLY)) < 0) { clicon_err(OE_UNIX, errno, "open(%s)", nspath); - return -1; + send_sock(sp[1], sp[1]); /* Dummy to wake parent */ + exit(1); /* Dont do return here, need to exit child */ } #ifdef HAVE_SETNS if (setns(fd, CLONE_NEWNET) < 0){ clicon_err(OE_UNIX, errno, "setns(%s)", netns); - return -1; + send_sock(sp[1], sp[1]); /* Dummy to wake parent */ + exit(1); /* Dont do return here, need to exit child */ } #endif close(fd); /* Create socket in this namespace */ - if (create_socket(sa, sin_len, backlog, flags, addrstr, &s) < 0) - return -1; + if (create_socket(sa, sin_len, backlog, flags, addrstr, &s) < 0){ + send_sock(sp[1], sp[1]); /* Dummy to wake parent */ + exit(1); /* Dont do return here, need to exit child */ + } /* Send socket to parent */ if (send_sock(sp[1], s) < 0) - return -1; + exit(1); /* Dont do return here, need to exit child */ close(s); close(sp[1]); exit(0); @@ -247,8 +252,14 @@ fork_netns_socket(const char *netns, if (get_sock(sp[0], sock) < 0) goto done; close(sp[0]); - if(waitpid(child, &status, 0) == child) + if(waitpid(child, &wstatus, 0) == child) ; // retval = WEXITSTATUS(status); /* Dont know what to do with status */ + if (WEXITSTATUS(wstatus)){ + clicon_debug(1, "%s wstatus:%d", __FUNCTION__, WEXITSTATUS(wstatus)); + *sock = -1; + clicon_err(OE_UNIX, EADDRNOTAVAIL, "bind(%s)", addrstr); + goto done; + } retval = 0; done: clicon_debug(1, "%s %d", __FUNCTION__, retval); diff --git a/test/test_restconf_netns.sh b/test/test_restconf_netns.sh index 102e0d42..936c6cc8 100755 --- a/test/test_restconf_netns.sh +++ b/test/test_restconf_netns.sh @@ -4,6 +4,10 @@ # Init running with a=42 # Get the config from default and netns namespace with/without SSL # Write b=99 in netns and read from default +# Also a failed usecase is failed bind in other namespace causes two processes/zombie +# 1. Configure one valid and one invalid address in other dataplane +# 2. Two restconf processes, one may be zombie + # Magic line must be first in script (see README.md) s="$_" ; . ./lib.sh || if [ "$s" = $0 ]; then exit 0; else return 0; fi @@ -35,6 +39,10 @@ veth=veth0 vethpeer=veth1 vaddr=10.23.1.1 # address in netns +vaddr2=10.23.10.1 # Used as invalid + +RESTCONFDBG=$DBG + # Create server certs certdir=$dir/certs srvkey=$certdir/srv_key.pem @@ -56,6 +64,7 @@ RESTCONFIG=$(cat <$srvkey $cakey false + $RESTCONFDBG default
0.0.0.0
@@ -134,7 +143,6 @@ sudo ip netns exec $netns ip link set dev lo up #sudo ip netns exec $netns ping $vaddr #----------------- - new "test params: -f $cfg" if [ $BE -ne 0 ]; then new "kill old backend" @@ -144,22 +152,22 @@ if [ $BE -ne 0 ]; then fi new "start backend -s init -f $cfg" start_backend -s init -f $cfg - - new "waiting" - wait_backend fi +new "wait backend" +wait_backend + if [ $RC -ne 0 ]; then new "kill old restconf daemon" stop_restconf_pre new "start restconf daemon" start_restconf -f $cfg - - new "waiting" - wait_restconf # need to use port 80/443 fi +new "wait restconf" +wait_restconf + new "add sample config w netconf" expecteof "$clixon_netconf -qf $cfg" 0 "$DEFAULTHELLOa42
]]>]]>" "^]]>]]>$" @@ -201,6 +209,109 @@ if [ $BE -ne 0 ]; then stop_backend -f $cfg fi +#----------------------------------------- + +RESTCONFIG=$(cat < + true + $RESTCONFDBG + none + false + + default +
127.0.0.1
+ 80 + false +
+ + $netns +
$vaddr
+ 80 + false +
+ + $netns +
$vaddr2
+ 80 + false +
+ +EOF +) + +cat < $cfg + + $cfg + ietf-netconf:startup + clixon-restconf:allow-auth-none + /usr/local/share/clixon + $IETFRFC + clixon-example + /usr/local/lib/$APPNAME/clispec + /usr/local/lib/$APPNAME/backend + example_backend.so$ + /usr/local/lib/$APPNAME/restconf + /usr/local/lib/$APPNAME/cli + $APPNAME + /usr/local/var/$APPNAME/$APPNAME.sock + /usr/local/var/$APPNAME/$APPNAME.pidfile + $dir + true + $RESTCONFIG + +EOF + +new "Failed usecase. Fail to bind in other namespace causes two processes/zombie" + +new "kill old restconf" +stop_restconf_pre + +new "test params: -f $cfg" +if [ $BE -ne 0 ]; then + new "kill old backend" + sudo clixon_backend -z -f $cfg + if [ $? -ne 0 ]; then + err + fi + + new "start backend -s init -f $cfg" + start_backend -s init -f $cfg +fi + +new "wait backend" +wait_backend + +if [ $RC -ne 0 ]; then + new "kill old restconf daemon" + stop_restconf_pre + + new "start restconf daemon" + start_restconf -f $cfg +fi + +new "wait restconf" +wait_restconf + + +sleep $DEMSLEEP +new "Check zombies" + +retx=$(ps aux| grep clixon | grep defunc | grep -v grep) +if [ -n "$retx" ]; then + err "No zombie process" "$retx" +fi + +if [ $BE -ne 0 ]; then + new "Kill backend" + # Check if premature kill + pid=$(pgrep -u root -f clixon_backend) + if [ -z "$pid" ]; then + err "backend already dead" + fi + # kill backend + stop_backend -f $cfg +fi + sudo ip link delete $veth sudo ip netns delete $netns