From fdf19f5467ea00f4dd2d5cb1737722eae8535c67 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 4 Feb 2024 04:36:09 +0100 Subject: [PATCH 01/28] cli: Fix missing closing file descriptors --- cli.c | 31 +++++++++++++++++++++++++++++++ l2tpns.c | 2 +- l2tpns.h | 2 ++ util.c | 12 ++++++++---- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/cli.c b/cli.c index bfbae29..b0049ac 100644 --- a/cli.c +++ b/cli.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -316,6 +317,36 @@ void cli_do(int sockfd) socklen_t l = sizeof(addr); if (fork_and_close()) return; + + /* Check that fork_and_close has closed everything but std* and the socket */ + int fdfd = open("/dev/fd", O_RDONLY|O_DIRECTORY); + if (fdfd >= 0) + { + DIR *fds = fdopendir(fdfd); + if (fds) + { + struct dirent *ent; + while ((ent = readdir(fds))) + { + if (!strcmp(ent->d_name, ".") + || !strcmp(ent->d_name, "..")) + continue; + + int fd = atoi(ent->d_name); + if (fd <= STDERR_FILENO) + continue; + if (fd == fdfd || fd == sockfd) + continue; + if (log_stream && fd == fileno(log_stream)) + continue; + + LOG(0, 0, 0, "Warning: fd %d is still open within cli. This may interfere with operations.\n", fd); + } + closedir(fds); + } + close(fdfd); + } + if (getpeername(sockfd, (struct sockaddr *) &addr, &l) == 0) { require_auth = addr.sin_addr.s_addr != inet_addr("127.0.0.1"); diff --git a/l2tpns.c b/l2tpns.c index aa9f06f..43c6413 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -81,7 +81,7 @@ static int tunidx; // ifr_ifindex of tun device int nlseqnum = 0; // netlink sequence number int min_initok_nlseqnum = 0; // minimun seq number for messages after init is ok static int syslog_log = 0; // are we logging to syslog -static FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). +FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). uint32_t last_id = 0; // Unique ID for radius accounting // Guest change char guest_users[10][32]; // Array of guest users diff --git a/l2tpns.h b/l2tpns.h index 53ace04..6d815ff 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1037,7 +1037,9 @@ extern uint32_t last_id; extern struct Tstats *_statistics; extern in_addr_t my_address; extern int clifd; +extern int nlfd; extern int epollfd; +extern FILE *log_stream; struct event_data { enum { diff --git a/util.c b/util.c index d3b352a..c6a51c8 100644 --- a/util.c +++ b/util.c @@ -108,12 +108,8 @@ pid_t fork_and_close() if (udpfd[i] != -1) close(udpfd[i]); } - if (pppoediscfd != -1) close(pppoediscfd); if (controlfd != -1) close(controlfd); if (daefd != -1) close(daefd); - if (snoopfd != -1) close(snoopfd); - if (rand_fd != -1) close(rand_fd); - if (epollfd != -1) close(epollfd); for (i = 0; radfds && i < RADIUS_FDS; i++) close(radfds[i]); @@ -124,6 +120,14 @@ pid_t fork_and_close() close(bgp_peers[i].sock); #endif /* BGP */ + if (nlfd != -1) close(nlfd); + if (pppoediscfd != -1) close(pppoediscfd); + if (pppoesessfd != -1) close(pppoesessfd); + + if (snoopfd != -1) close(snoopfd); + if (rand_fd != -1) close(rand_fd); + if (epollfd != -1) close(epollfd); + return pid; } From 60329986b242e766fc965b853fb24694b710698d Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 18 Apr 2023 00:07:40 +0200 Subject: [PATCH 02/28] Rename netlink infrastructure to rtnetlink We will introduce genetlink infrastructure just after this. --- l2tpns.c | 161 ++++++++++++++++++++++++++++++++++--------------------- l2tpns.h | 4 +- util.c | 2 +- 3 files changed, 102 insertions(+), 65 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 43c6413..05986ba 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -63,7 +63,7 @@ uint32_t call_serial_number = 0; // Globals configt *config = NULL; // all configuration -int nlfd = -1; // netlink socket +int rtnlfd = -1; // route netlink socket int tunfd = -1; // tun interface file handle. (network device) int udpfd[MAX_UDPFD + 1] = INIT_TABUDPFD; // array UDP file handle + 1 for lac udp int udplacfd = -1; // UDP LAC file handle @@ -78,8 +78,8 @@ int epollfd = -1; // event polling time_t basetime = 0; // base clock char hostname[MAXHOSTNAME] = ""; // us. static int tunidx; // ifr_ifindex of tun device -int nlseqnum = 0; // netlink sequence number -int min_initok_nlseqnum = 0; // minimun seq number for messages after init is ok +int rtnlseqnum = 0; // route netlink sequence number +int min_initok_rtnlseqnum = 0; // minimun seq number for messages after init is ok static int syslog_log = 0; // are we logging to syslog FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). uint32_t last_id = 0; // Unique ID for radius accounting @@ -240,8 +240,9 @@ struct Tstats *_statistics = NULL; struct Tringbuffer *ringbuffer = NULL; #endif -static ssize_t netlink_send(struct nlmsghdr *nh); -static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static ssize_t rtnetlink_send(struct nlmsghdr *nh); +static int netlink_handle_ack(struct nlmsghdr *nh, int min_initok_nlseqnum, char *tun_nl_phase_msg[]); +static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); static void cache_ipmap(in_addr_t ip, sessionidt s); static void uncache_ipmap(in_addr_t ip); static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s); @@ -497,20 +498,20 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); n_ip = htonl(ip); - netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); + rtnetlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); if (gw) { n_ip = htonl(gw); - netlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip)); + rtnetlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip)); } LOG(1, s, session[s].tunnel, "Route %s %s/%d%s%s\n", add ? "add" : "del", fmtaddr(htonl(ip), 0), prefixlen, gw ? " via" : "", gw ? fmtaddr(htonl(gw), 2) : ""); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) LOG(0, 0, 0, "routeset() error in sending netlink message: %s\n", strerror(errno)); #ifdef BGP @@ -576,17 +577,17 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); - netlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); + rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + rtnetlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); metric = 1; - netlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); + rtnetlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); LOG(1, s, session[s].tunnel, "Route %s %s/%d\n", add ? "add" : "del", inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), prefixlen); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) LOG(0, 0, 0, "route6set() error in sending netlink message: %s\n", strerror(errno)); #ifdef BGP @@ -613,10 +614,10 @@ static void initnetlink(void) { struct sockaddr_nl nladdr; - nlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (nlfd < 0) + rtnlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rtnlfd < 0) { - LOG(0, 0, 0, "Can't create netlink socket: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't create route netlink socket: %s\n", strerror(errno)); exit(1); } @@ -624,21 +625,23 @@ static void initnetlink(void) nladdr.nl_family = AF_NETLINK; nladdr.nl_pid = getpid(); - if (bind(nlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + if (bind(rtnlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) { - LOG(0, 0, 0, "Can't bind netlink socket: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't bind route netlink socket: %s\n", strerror(errno)); exit(1); } } -static ssize_t netlink_send(struct nlmsghdr *nh) +// +// Send message to a netlink socket +static ssize_t netlink_send(int fd, int *seqnum, struct nlmsghdr *nh) { struct sockaddr_nl nladdr; struct iovec iov; struct msghdr msg; nh->nlmsg_pid = getpid(); - nh->nlmsg_seq = ++nlseqnum; + nh->nlmsg_seq = ++*seqnum; // set kernel address memset(&nladdr, 0, sizeof(nladdr)); @@ -647,10 +650,19 @@ static ssize_t netlink_send(struct nlmsghdr *nh) iov = (struct iovec){ (void *)nh, nh->nlmsg_len }; msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; - return sendmsg(nlfd, &msg, 0); + return sendmsg(fd, &msg, 0); } -static ssize_t netlink_recv(void *buf, ssize_t len) +// +// Send message to the route netlink socket +static ssize_t rtnetlink_send(struct nlmsghdr *nh) +{ + return netlink_send(rtnlfd, &rtnlseqnum, nh); +} + +// +// Receive a message from a netlink socket +static ssize_t netlink_recv(int fd, void *buf, ssize_t len) { struct sockaddr_nl nladdr; struct iovec iov; @@ -663,11 +675,52 @@ static ssize_t netlink_recv(void *buf, ssize_t len) iov = (struct iovec){ buf, len }; msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; - return recvmsg(nlfd, &msg, 0); + return recvmsg(fd, &msg, 0); } +// +// Receive a message from the route netlink socket +static ssize_t rtnetlink_recv(void *buf, ssize_t len) +{ + return netlink_recv(rtnlfd, buf, len); +} + +// +// Look ack netlink message for errors +static int netlink_handle_ack(struct nlmsghdr *nh, int min_initok_nlseqnum, char *tun_nl_phase_msg[]) +{ + if (nh->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *errmsg = NLMSG_DATA(nh); + if (errmsg->error) + { + if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum) + { + LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error)); + exit(1); + } + else + { + LOG(0, 0, 0, "For netlink request %d, got a netlink error: %s\n", errmsg->msg.nlmsg_type, strerror(-errmsg->error)); + errno = -errmsg->error; + return -1; + } + } + // else it's an ack + return 0; + } + else + { + LOG(1, 0, 0, "Got an unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags); + errno = EIO; + return -1; + } +} + +// +// Add an attribute to a message for a route netlink socket /* adapted from iproute2 */ -static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) +static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) { int len = RTA_LENGTH(alen); struct rtattr *rta; @@ -680,7 +733,7 @@ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int } // messages corresponding to different phases seq number -static char *tun_nl_phase_msg[] = { +static char *tun_rtnl_phase_msg[] = { "initialized", "getting tun interface index", "setting tun interface parameters", @@ -753,12 +806,12 @@ static void inittun(void) /* Bump up the qlen to deal with bursts from the network */ txqlen = 1000; - netlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); + rtnetlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); /* set MTU to modem MRU */ mtu = MRU; - netlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu)); + rtnetlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu)); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) goto senderror; memset(&req, 0, sizeof(req)); @@ -778,8 +831,8 @@ static void inittun(void) for (i = 0; i < config->nbmultiaddress ; i++) { ip = config->iftun_n_address[i]; - netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); - if (netlink_send(&req.nh) < 0) + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + if (rtnetlink_send(&req.nh) < 0) goto senderror; } } @@ -789,9 +842,9 @@ static void inittun(void) ip = config->iftun_address; else ip = 0x01010101; // 1.1.1.1 - netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) goto senderror; } @@ -817,9 +870,9 @@ static void inittun(void) ip6.s6_addr[0] = 0xFE; ip6.s6_addr[1] = 0x80; ip6.s6_addr[15] = 1; - netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) goto senderror; memset(&req, 0, sizeof(req)); @@ -836,9 +889,9 @@ static void inittun(void) // Global address is prefix::1 ip6 = config->ipv6_prefix; ip6.s6_addr[15] = 1; - netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) goto senderror; } @@ -847,15 +900,15 @@ static void inittun(void) req.nh.nlmsg_type = NLMSG_DONE; req.nh.nlmsg_len = NLMSG_LENGTH(0); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) goto senderror; // if we get an error for seqnum < min_initok_nlseqnum, // we must exit as initialization went wrong if (config->ipv6_prefix.s6_addr[0]) - min_initok_nlseqnum = 5 + 1; // idx + if + addr + 2*addr6 + min_initok_rtnlseqnum = 5 + 1; // idx + if + addr + 2*addr6 else - min_initok_nlseqnum = 3 + 1; // idx + if + addr + min_initok_rtnlseqnum = 3 + 1; // idx + if + addr } return; @@ -4135,8 +4188,8 @@ static void mainloop(void) exit(1); } - LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, nlfd=%d , udplacfd=%d, pppoefd=%d, pppoesessfd=%d\n", - clifd, cluster_sockfd, tunfd, udpfd[0], controlfd, daefd, nlfd, udplacfd, pppoediscfd, pppoesessfd); + LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, rtnlfd=%d , udplacfd=%d, pppoefd=%d, pppoesessfd=%d\n", + clifd, cluster_sockfd, tunfd, udpfd[0], controlfd, daefd, rtnlfd, udplacfd, pppoediscfd, pppoesessfd); /* setup our fds to poll for input */ { @@ -4169,9 +4222,9 @@ static void mainloop(void) e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, daefd, &e); - d[i].type = FD_TYPE_NETLINK; + d[i].type = FD_TYPE_RTNETLINK; e.data.ptr = &d[i++]; - epoll_ctl(epollfd, EPOLL_CTL_ADD, nlfd, &e); + epoll_ctl(epollfd, EPOLL_CTL_ADD, rtnlfd, &e); d[i].type = FD_TYPE_PPPOEDISC; e.data.ptr = &d[i++]; @@ -4334,27 +4387,11 @@ static void mainloop(void) break; #endif /* BGP */ - case FD_TYPE_NETLINK: + case FD_TYPE_RTNETLINK: { struct nlmsghdr *nh = (struct nlmsghdr *)p; - s = netlink_recv(p, size_bufp); - if (nh->nlmsg_type == NLMSG_ERROR) - { - struct nlmsgerr *errmsg = NLMSG_DATA(nh); - if (errmsg->error) - { - if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum) - { - LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error)); - exit(1); - } - else - LOG(0, 0, 0, "Got a netlink error: %s\n", strerror(-errmsg->error)); - } - // else it's a ack - } - else - LOG(1, 0, 0, "Got a unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags); + s = rtnetlink_recv(p, size_bufp); + netlink_handle_ack(nh, min_initok_rtnlseqnum, tun_rtnl_phase_msg); n--; break; } diff --git a/l2tpns.h b/l2tpns.h index 6d815ff..aa0ef46 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1037,7 +1037,7 @@ extern uint32_t last_id; extern struct Tstats *_statistics; extern in_addr_t my_address; extern int clifd; -extern int nlfd; +extern int rtnlfd; extern int epollfd; extern FILE *log_stream; @@ -1051,7 +1051,7 @@ struct event_data { FD_TYPE_DAE, FD_TYPE_RADIUS, FD_TYPE_BGP, - FD_TYPE_NETLINK, + FD_TYPE_RTNETLINK, FD_TYPE_PPPOEDISC, FD_TYPE_PPPOESESS } type; diff --git a/util.c b/util.c index c6a51c8..c9bc54e 100644 --- a/util.c +++ b/util.c @@ -120,7 +120,7 @@ pid_t fork_and_close() close(bgp_peers[i].sock); #endif /* BGP */ - if (nlfd != -1) close(nlfd); + if (rtnlfd != -1) close(rtnlfd); if (pppoediscfd != -1) close(pppoediscfd); if (pppoesessfd != -1) close(pppoesessfd); From 5db476bb6e91eeff515ab9ae0eefd5b0d2150982 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 18 Apr 2023 01:07:25 +0200 Subject: [PATCH 03/28] Add generic netlink infrastructure --- l2tpns.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- l2tpns.h | 1 + util.c | 1 + 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 05986ba..16748db 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "md5.h" #include "dhcp6.h" @@ -64,6 +65,7 @@ uint32_t call_serial_number = 0; // Globals configt *config = NULL; // all configuration int rtnlfd = -1; // route netlink socket +int genlfd = -1; // generic netlink socket int tunfd = -1; // tun interface file handle. (network device) int udpfd[MAX_UDPFD + 1] = INIT_TABUDPFD; // array UDP file handle + 1 for lac udp int udplacfd = -1; // UDP LAC file handle @@ -79,6 +81,7 @@ time_t basetime = 0; // base clock char hostname[MAXHOSTNAME] = ""; // us. static int tunidx; // ifr_ifindex of tun device int rtnlseqnum = 0; // route netlink sequence number +int genlseqnum = 0; // generic netlink sequence number int min_initok_rtnlseqnum = 0; // minimun seq number for messages after init is ok static int syslog_log = 0; // are we logging to syslog FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). @@ -241,8 +244,12 @@ struct Tringbuffer *ringbuffer = NULL; #endif static ssize_t rtnetlink_send(struct nlmsghdr *nh); -static int netlink_handle_ack(struct nlmsghdr *nh, int min_initok_nlseqnum, char *tun_nl_phase_msg[]); +static ssize_t genetlink_send(struct nlmsghdr *nh); +static ssize_t genetlink_recv(void *buf, ssize_t len); +static int netlink_handle_ack(struct nlmsghdr *nh, int gen, int min_initok_nlseqnum, char *tun_nl_phase_msg[]); static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static void genetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static int genetlink_getattr(struct nlmsghdr *nh, int type, void *data, int alen); static void cache_ipmap(in_addr_t ip, sessionidt s); static void uncache_ipmap(in_addr_t ip); static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s); @@ -630,6 +637,23 @@ static void initnetlink(void) LOG(0, 0, 0, "Can't bind route netlink socket: %s\n", strerror(errno)); exit(1); } + + genlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (genlfd < 0) + { + LOG(0, 0, 0, "Can't create generic netlink socket: %s\n", strerror(errno)); + exit(1); + } + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = getpid(); + + if (bind(genlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + { + LOG(0, 0, 0, "Can't bind generic netlink socket: %s\n", strerror(errno)); + exit(1); + } } // @@ -660,6 +684,13 @@ static ssize_t rtnetlink_send(struct nlmsghdr *nh) return netlink_send(rtnlfd, &rtnlseqnum, nh); } +// +// Send message to the generic netlink socket +static ssize_t genetlink_send(struct nlmsghdr *nh) +{ + return netlink_send(genlfd, &genlseqnum, nh); +} + // // Receive a message from a netlink socket static ssize_t netlink_recv(int fd, void *buf, ssize_t len) @@ -685,9 +716,16 @@ static ssize_t rtnetlink_recv(void *buf, ssize_t len) return netlink_recv(rtnlfd, buf, len); } +// +// Receive a message from the generic netlink socket +static ssize_t genetlink_recv(void *buf, ssize_t len) +{ + return netlink_recv(genlfd, buf, len); +} + // // Look ack netlink message for errors -static int netlink_handle_ack(struct nlmsghdr *nh, int min_initok_nlseqnum, char *tun_nl_phase_msg[]) +static int netlink_handle_ack(struct nlmsghdr *nh, int gen, int min_initok_nlseqnum, char *tun_nl_phase_msg[]) { if (nh->nlmsg_type == NLMSG_ERROR) { @@ -701,7 +739,13 @@ static int netlink_handle_ack(struct nlmsghdr *nh, int min_initok_nlseqnum, char } else { - LOG(0, 0, 0, "For netlink request %d, got a netlink error: %s\n", errmsg->msg.nlmsg_type, strerror(-errmsg->error)); + if (gen) + { + struct genlmsghdr *glh = NLMSG_DATA(&errmsg->msg); + LOG(0, 0, 0, "For generic netlink request %d on %d, got a netlink error: %s\n", glh->cmd, errmsg->msg.nlmsg_type, strerror(-errmsg->error)); + } + else + LOG(0, 0, 0, "For netlink request %d, got a netlink error: %s\n", errmsg->msg.nlmsg_type, strerror(-errmsg->error)); errno = -errmsg->error; return -1; } @@ -732,6 +776,42 @@ static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, i nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(len); } +// +// Add an attribute to a message for a generic netlink socket +static void genetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) +{ + int len = NLA_HDRLEN + alen; + struct nlattr *nla; + + nla = (struct nlattr *)(((void *)nh) + NLMSG_ALIGN(nh->nlmsg_len)); + nla->nla_type = type; + nla->nla_len = len; + memcpy((char*)nla + NLA_HDRLEN, data, alen); + nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(len); +} + +// +// Find attribute from a message +static int genetlink_getattr(struct nlmsghdr *nh, int type, void *attr, int alen) +{ + char *glh = NLMSG_DATA(nh); + char *data = glh + GENL_HDRLEN; + char *data_end = data + nh->nlmsg_len - NLMSG_HDRLEN - GENL_HDRLEN; + struct nlattr *ah; + + for (ah = (void*) data; (char*) ah < data_end; ah = (void*) ((char *) ah + NLA_ALIGN(ah->nla_len))) + { + if ((ah->nla_type & NLA_TYPE_MASK) == type) + { + if (ah->nla_len != NLA_HDRLEN + alen) + LOG(0, 0, 0, "Erroneous attribute %d size\n", type); + memcpy(attr, ((char*) ah + NLA_HDRLEN), alen); + return 0; + } + } + return -1; +} + // messages corresponding to different phases seq number static char *tun_rtnl_phase_msg[] = { "initialized", @@ -4391,7 +4471,7 @@ static void mainloop(void) { struct nlmsghdr *nh = (struct nlmsghdr *)p; s = rtnetlink_recv(p, size_bufp); - netlink_handle_ack(nh, min_initok_rtnlseqnum, tun_rtnl_phase_msg); + netlink_handle_ack(nh, 0, min_initok_rtnlseqnum, tun_rtnl_phase_msg); n--; break; } diff --git a/l2tpns.h b/l2tpns.h index aa0ef46..8b1f121 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1038,6 +1038,7 @@ extern struct Tstats *_statistics; extern in_addr_t my_address; extern int clifd; extern int rtnlfd; +extern int genlfd; extern int epollfd; extern FILE *log_stream; diff --git a/util.c b/util.c index c9bc54e..5c7c44b 100644 --- a/util.c +++ b/util.c @@ -121,6 +121,7 @@ pid_t fork_and_close() #endif /* BGP */ if (rtnlfd != -1) close(rtnlfd); + if (genlfd != -1) close(genlfd); if (pppoediscfd != -1) close(pppoediscfd); if (pppoesessfd != -1) close(pppoesessfd); From 1f4d79ce85d163944c4ccddaadb628d99f150a55 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 18 Apr 2023 01:05:04 +0200 Subject: [PATCH 04/28] Add L2TP kernel infrastructure This essentially exposes the kernel features, without using them yet. --- l2tpns.c | 463 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) diff --git a/l2tpns.c b/l2tpns.c index 16748db..658b87c 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -39,6 +39,15 @@ #include #include #include +#include +#include +#include +#include + +#ifndef PPPIOCBRIDGECHAN +#define PPPIOCBRIDGECHAN _IOW('t', 53, int) +#define PPPIOCUNBRIDGECHAN _IO('t', 54) +#endif #include "md5.h" #include "dhcp6.h" @@ -66,6 +75,7 @@ uint32_t call_serial_number = 0; configt *config = NULL; // all configuration int rtnlfd = -1; // route netlink socket int genlfd = -1; // generic netlink socket +int genl_l2tp_id = -1; // L2TP generic netlink ID int tunfd = -1; // tun interface file handle. (network device) int udpfd[MAX_UDPFD + 1] = INIT_TABUDPFD; // array UDP file handle + 1 for lac udp int udplacfd = -1; // UDP LAC file handle @@ -461,6 +471,373 @@ void random_data(uint8_t *buf, int len) buf[n++] = (rand() >> 4) & 0xff; } +// +// Create tunnel in kernel +static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_TUNNEL_CREATE; + req.glh.version = L2TP_GENL_VERSION; + + uint32_t fd = udpfd[tunnel[tid].indexudp]; + genetlink_addattr(&req.nh, L2TP_ATTR_FD, &fd, sizeof(fd)); + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); + uint8_t version = 2; + genetlink_addattr(&req.nh, L2TP_ATTR_PROTO_VERSION, &version, sizeof(version)); + uint16_t encap = L2TP_ENCAPTYPE_UDP; + genetlink_addattr(&req.nh, L2TP_ATTR_ENCAP_TYPE, &encap, sizeof(encap)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, tid, "Can't create tunnel %d to %d: %s\n", tid, peer_tid, strerror(errno)); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for tunnel creation: %s\n", strerror(errno)); + return -1; + } + + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Delete tunnel in kernel +static int delete_kernel_tunnel(uint32_t tid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, 0, tid, "Deleting kernel tunnel for %u\n", tid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_TUNNEL_DELETE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, tid, "Can't delete tunnel %d: %s\n", tid, strerror(errno)); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for tunnel deletion: %s\n", strerror(errno)); + return -1; + } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Create session in kernel +static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, sid, tid, "Creating kernel session from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_SESSION_CREATE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_SESSION_ID, &sid, sizeof(sid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_SESSION_ID, &peer_sid, sizeof(peer_sid)); + uint16_t pwtype = L2TP_PWTYPE_PPP; + genetlink_addattr(&req.nh, L2TP_ATTR_PW_TYPE, &pwtype, sizeof(pwtype)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, sid, tid, "Can't create session %d:%d to %d:%d: %s\n", tid, sid, peer_tid, peer_sid, strerror(errno)); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for session creation: %s\n", strerror(errno)); + return -1; + } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Delete session in kernel +static int delete_kernel_session(uint32_t tid, uint32_t sid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, sid, tid, "Deleting kernel session for %u:%u\n", tid, sid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_SESSION_DELETE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_SESSION_ID, &sid, sizeof(sid)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, sid, tid, "Can't delete session %d:%d: %s\n", tid, sid, strerror(errno)); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for session deletion: %s\n", strerror(errno)); + return -1; + } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Create the kernel PPPoX socket +static int create_ppp_socket(int udp_fd, uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid, const struct sockaddr *dst, socklen_t addrlen) +{ + int pppox_fd; + int ret; + + if (genl_l2tp_id < 0) + return -1; + + LOG(3, sid, tid, "Creating PPPoL2TPsocket from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); + + pppox_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP); + if (pppox_fd < 0) + { + LOG(2, sid, tid, "Can't create PPPoL2TP socket: %s\n", strerror(errno)); + return -1; + } + + struct sockaddr_pppol2tp sax; + memset(&sax, 0, sizeof(sax)); + + sax.sa_family = AF_PPPOX; + sax.sa_protocol = PX_PROTO_OL2TP; + sax.pppol2tp.fd = udp_fd; + memcpy(&sax.pppol2tp.addr, dst, addrlen); + sax.pppol2tp.s_tunnel = tid; + sax.pppol2tp.s_session = sid; + sax.pppol2tp.d_tunnel = peer_tid; + sax.pppol2tp.d_session = peer_sid; + + ret = connect(pppox_fd, (struct sockaddr *)&sax, sizeof(sax)); + if (ret < 0) + { + LOG(2, sid, tid, "Can't connect PPPoL2TP: %s\n", strerror(errno)); + close(pppox_fd); + return -1; + } + + return pppox_fd; +} + +// +// Get the kernel PPP channel +static int get_kernel_ppp_chan(sessionidt s, int pppox_fd) +{ + int ret; + int chindx; + + ret = ioctl(pppox_fd, PPPIOCGCHAN, &chindx); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get pppox_fd chan: %s\n", strerror(errno)); + return -1; + } + + return chindx; +} + +// +// Get the kernel PPP channel fd +static int create_kernel_ppp_chan(sessionidt s, int pppox_fd) +{ + int chindx = get_kernel_ppp_chan(s, pppox_fd); + int ret; + + int ppp_chan_fd = open("/dev/ppp", O_RDWR); + + LOG(3, s, session[s].tunnel, "Creating PPP channel\n"); + + ret = fcntl(ppp_chan_fd, F_GETFL, NULL); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get ppp chan flags: %s\n", strerror(errno)); + close(ppp_chan_fd); + return -1; + } + ret = fcntl(ppp_chan_fd, F_SETFL, ret | O_NONBLOCK); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't set ppp chan flags: %s\n", strerror(errno)); + close(ppp_chan_fd); + return -1; + } + + ret = ioctl(ppp_chan_fd, PPPIOCATTCHAN, &chindx); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't attach channel %d: %s\n", chindx, strerror(errno)); + close(ppp_chan_fd); + return -1; + } + + return ppp_chan_fd; +} + +// +// Create the kernel PPP interface +static int create_kernel_ppp_if(sessionidt s, int ppp_chan_fd, int *ifunit) +{ + int ppp_if_fd = open("/dev/ppp", O_RDWR); + int ret; + + LOG(3, s, session[s].tunnel, "Creating PPP interface\n"); + + ret = fcntl(ppp_if_fd, F_GETFL, NULL); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get ppp if flags: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + ret = fcntl(ppp_if_fd, F_SETFL, ret | O_NONBLOCK); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't set ppp if flags: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + + ret = ioctl(ppp_if_fd, PPPIOCNEWUNIT, ifunit); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't create ppp interface: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + + ret = ioctl(ppp_chan_fd, PPPIOCCONNECT, ifunit); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't attach channel to unit %d: %s\n", *ifunit, strerror(errno)); + close(ppp_if_fd); + return -1; + } + + return ppp_if_fd; +} + +// +// Bridge kernel channels to accelerate LAC +static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) +{ + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + int chindx2 = get_kernel_ppp_chan(s, pppox_fd2); + int ret; + + ret = ioctl(ppp_chan_fd, PPPIOCBRIDGECHAN, &chindx2); + close(ppp_chan_fd); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't set LAC bridge: %s\n", strerror(errno)); + return -1; + } + return 0; +} + // Add a route // // This adds it to the routing table, advertises it @@ -615,6 +992,89 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) return; } +// +// Get L2TP netlink id +static int16_t netlink_get_l2tp_id(void) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[32]; + } req; + struct nlattr *ah; + int16_t ret; + + if (system("modprobe l2tp_ppp")) + LOG(3, 0, 0, "Can't modprobe l2tp_ppp: %s\n", strerror(errno)); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_type = GENL_ID_CTRL; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = CTRL_CMD_GETFAMILY; + req.glh.version = 1; + + genetlink_addattr(&req.nh, CTRL_ATTR_FAMILY_NAME, L2TP_GENL_NAME, sizeof(L2TP_GENL_NAME)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, 0, "Can't send request for l2tp netlink name: %s\n", strerror(errno)); + return -1; + } + + + ssize_t size = genetlink_recv(&req.nh, sizeof(req)); + if (size < 0) + { + LOG(2, 0, 0, "Can't receive answer for l2tp netlink name: %s\n", strerror(errno)); + return -1; + } + if (size < sizeof(req.nh)) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + return -1; + } + + if (req.nh.nlmsg_type != GENL_ID_CTRL) + { + LOG(2, 0, 0, "Unexpected answer type %d for l2tp netlink name.\n" + "Does your Linux kernel have the l2tp_netlink module available?\n", req.nh.nlmsg_type); + return -1; + } + if (size < NLMSG_HDRLEN + GENL_HDRLEN) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + return -1; + } + + size -= NLMSG_HDRLEN + GENL_HDRLEN; + ret = -1; + char *data = &req.data[0]; + for (ah = (void*) data; (char*) ah < data + size; ah = (void*) ((char *) ah + NLA_ALIGN(ah->nla_len))) + { + if ((ah->nla_type & NLA_TYPE_MASK) == CTRL_ATTR_FAMILY_ID) + { + if (ah->nla_len < NLA_HDRLEN + 2) + LOG(2, 0, 0, "Short netlink family ID for l2tp\n"); + ret = *(uint16_t*) ((char*) ah + NLA_HDRLEN); + break; + } + } + if (ret == -1) + LOG(2, 0, 0, "Did not get netlink family ID for l2tp\n"); + + size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + LOG(2, 0, 0, "Can't receive ack for family ID: %s\n", strerror(errno)); + else + netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL); + + return ret; +} + // // Set up netlink socket static void initnetlink(void) @@ -654,6 +1114,9 @@ static void initnetlink(void) LOG(0, 0, 0, "Can't bind generic netlink socket: %s\n", strerror(errno)); exit(1); } + + genl_l2tp_id = netlink_get_l2tp_id(); + LOG(3, 0, 0, "gen l2tp id is %d\n", genl_l2tp_id); } // From b2bc6da8275b28368792ef56f4e477d9140027dd Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 23:01:05 +0200 Subject: [PATCH 05/28] Add support for deleting all kernel tunnels/sessions Unfortunately, tunnels and session can survive us, so we have to drop any tunnel/session left from a previous instance that might have crashed. --- l2tpns.c | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 192 insertions(+), 4 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 658b87c..3e7c9d6 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -471,6 +471,132 @@ void random_data(uint8_t *buf, int len) buf[n++] = (rand() >> 4) & 0xff; } +// +// Clear all existing kernel items of a given type +static int delete_kernel_items(const char *name, int cmd, int id1, int id2, void (*delete_one)(uint32_t id1, uint32_t id2)) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[8192]; + } req; + int seqnum; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, 0, 0, "Deleting all kernel %ss\n", name); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK|NLM_F_DUMP; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = cmd; + req.glh.version = L2TP_GENL_VERSION; + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, 0, "Can't delete %ss: %s\n", name, strerror(errno)); + return -1; + } + seqnum = genlseqnum; + + /* 1 for receiving "done" */ + int nitems = 1; + int done = 0; + + while (done < nitems) + { + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(2, 0, 0, "Can't receive answer for %s deletion: %s\n", name, strerror(errno)); + return -1; + } + + // Iterate over all answers + struct nlmsghdr *nh; + for (nh = &req.nh; size; nh = NLMSG_NEXT(nh, size)) + { + if (!NLMSG_OK(nh, size)) + { + LOG(2, 0, 0, "Short netlink answer: %d vs %zd\n", nh->nlmsg_len, size); + break; + } + + if (nh->nlmsg_type == NLMSG_NOOP) + { + // Ignore + continue; + } + + if (nh->nlmsg_type == NLMSG_DONE) + { + done++; + if (done < nitems) + LOG(3, 0, 0, "Done queueing, still %d/%d %ss deletion pending\n", done, nitems, name); + continue; + } + + if (nh->nlmsg_seq != seqnum) + { + // Consume acknoledgments of deletions. + netlink_handle_ack(nh, 1, 0, NULL); + done++; + } + else + { + // Getting more items + if (nh->nlmsg_type != genl_l2tp_id) + { + LOG(2, 0, 0, "Unexpected generic netlink answer %d\n", req.nh.nlmsg_type); + continue; + } + + if (nh->nlmsg_len < NLMSG_HDRLEN + GENL_HDRLEN) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + continue; + } + + uint32_t ret; + if (genetlink_getattr(nh, id1, &ret, sizeof(ret)) != 0) + LOG(2, 0, 0, "Did not get %s ID\n", name); + else + { + if (!id2) + { + delete_one(ret, 0); + nitems++; + } + else + { + uint32_t ret2; + if (genetlink_getattr(nh, id2, &ret2, sizeof(ret2)) != 0) + LOG(2, 0, 0, "Did not get %s ID2\n", name); + else + { + // Queue deletion for this + delete_one(ret, ret2); + nitems++; + } + } + } + } + } + } + LOG(3, 0, 0, "Done deleting %ss\n", name); + + return 0; +} + // // Create tunnel in kernel static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) @@ -529,8 +655,8 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) } // -// Delete tunnel in kernel -static int delete_kernel_tunnel(uint32_t tid) +// Queue deleting tunnel in kernel +static int queue_delete_kernel_tunnel(uint32_t tid) { struct { struct nlmsghdr nh; @@ -565,6 +691,23 @@ static int delete_kernel_tunnel(uint32_t tid) return -1; } + return 0; +} + +// +// Delete tunnel in kernel +static int delete_kernel_tunnel(uint32_t tid) +{ + int ret = queue_delete_kernel_tunnel(tid); + if (ret < 0) + return -1; + + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + ssize_t size = genetlink_recv(&req, sizeof(req)); if (size < 0) { @@ -577,6 +720,20 @@ static int delete_kernel_tunnel(uint32_t tid) return 0; } +// +// Clear all existing tunnels +// +// Unfortunately, tunnels survive us, so we have to drop any tunnel left from a +// previous instance that might have crashed. +static void delete_one_kernel_tunnel(uint32_t id1, uint32_t id2) +{ + queue_delete_kernel_tunnel(id1); +} +static void delete_kernel_tunnels(void) +{ + delete_kernel_items("tunnel", L2TP_CMD_TUNNEL_GET, L2TP_ATTR_CONN_ID, L2TP_ATTR_NONE, delete_one_kernel_tunnel); +} + // // Create session in kernel static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid) @@ -632,8 +789,8 @@ static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, } // -// Delete session in kernel -static int delete_kernel_session(uint32_t tid, uint32_t sid) +// Queue deleting session in kernel +static int queue_delete_kernel_session(uint32_t tid, uint32_t sid) { struct { struct nlmsghdr nh; @@ -669,6 +826,23 @@ static int delete_kernel_session(uint32_t tid, uint32_t sid) return -1; } + return 0; +} + +// +// Delete session in kernel +static int delete_kernel_session(uint32_t tid, uint32_t sid) +{ + int ret = queue_delete_kernel_session(tid, sid); + if (ret < 0) + return -1; + + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + ssize_t size = genetlink_recv(&req, sizeof(req)); if (size < 0) { @@ -681,6 +855,20 @@ static int delete_kernel_session(uint32_t tid, uint32_t sid) return 0; } +// +// Clear all existing sessions +// +// Unfortunately, sessions survive us, so we have to drop any session left from a +// previous instance that might have crashed. +static void delete_one_kernel_session(uint32_t id1, uint32_t id2) +{ + queue_delete_kernel_session(id2, id1); +} +static void delete_kernel_sessions(void) +{ + delete_kernel_items("session", L2TP_CMD_SESSION_GET, L2TP_ATTR_SESSION_ID, L2TP_ATTR_CONN_ID, delete_one_kernel_session); +} + // // Create the kernel PPPoX socket static int create_ppp_socket(int udp_fd, uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid, const struct sockaddr *dst, socklen_t addrlen) From 5df78e9467896abe4105b4cb3987c147c0067fd1 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 15:42:54 +0200 Subject: [PATCH 06/28] Separate out if configuration into setupif So we can later apply it to ppp interfaces. Better seen with diff -w --- l2tpns.c | 185 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 95 insertions(+), 90 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 3e7c9d6..a80589d 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -253,6 +253,7 @@ struct Tstats *_statistics = NULL; struct Tringbuffer *ringbuffer = NULL; #endif +static int setupif(int ifidx, uint32_t mru, int config_addr); static ssize_t rtnetlink_send(struct nlmsghdr *nh); static ssize_t genetlink_send(struct nlmsghdr *nh); static ssize_t genetlink_recv(void *buf, ssize_t len); @@ -1510,41 +1511,51 @@ static void inittun(void) LOG(0, 0, 0, "Can't get tun interface index\n"); exit(1); } - + if (setupif(tunidx, MRU, 1) < 0) { - struct { - // interface setting - struct nlmsghdr nh; - union { - struct ifinfomsg ifinfo; - struct ifaddrmsg ifaddr; - } ifmsg; - char rtdata[32]; // 32 should be enough - } req; - uint32_t txqlen, mtu; - in_addr_t ip; + LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno)); + exit(1); + } +} - memset(&req, 0, sizeof(req)); +// +// Set up an interface for serving as gateway +static int setupif(int ifidx, uint32_t mru, int config_addr) +{ + struct { + // interface setting + struct nlmsghdr nh; + union { + struct ifinfomsg ifinfo; + struct ifaddrmsg ifaddr; + } ifmsg; + char rtdata[32]; // 32 should be enough + } req; + uint32_t txqlen; + in_addr_t ip; - req.nh.nlmsg_type = RTM_NEWLINK; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo)); + memset(&req, 0, sizeof(req)); - req.ifmsg.ifinfo.ifi_family = AF_UNSPEC; - req.ifmsg.ifinfo.ifi_index = tunidx; - req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up - req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo)); - /* Bump up the qlen to deal with bursts from the network */ - txqlen = 1000; - rtnetlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); - /* set MTU to modem MRU */ - mtu = MRU; - rtnetlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu)); + req.ifmsg.ifinfo.ifi_family = AF_UNSPEC; + req.ifmsg.ifinfo.ifi_index = ifidx; + req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up + req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag - if (rtnetlink_send(&req.nh) < 0) - goto senderror; + /* Bump up the qlen to deal with bursts from the network */ + txqlen = 1000; + rtnetlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); + /* set MTU to modem MRU */ + rtnetlink_addattr(&req.nh, IFLA_MTU, &mru, sizeof(mru)); + if (rtnetlink_send(&req.nh) < 0) + return -1; + + if (config_addr) + { memset(&req, 0, sizeof(req)); req.nh.nlmsg_type = RTM_NEWADDR; @@ -1554,7 +1565,7 @@ static void inittun(void) req.ifmsg.ifaddr.ifa_family = AF_INET; req.ifmsg.ifaddr.ifa_prefixlen = 32; req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; - req.ifmsg.ifaddr.ifa_index = tunidx; + req.ifmsg.ifaddr.ifa_index = ifidx; if (config->nbmultiaddress > 1) { @@ -1564,7 +1575,7 @@ static void inittun(void) ip = config->iftun_n_address[i]; rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); if (rtnetlink_send(&req.nh) < 0) - goto senderror; + return -1; } } else @@ -1576,77 +1587,71 @@ static void inittun(void) rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); if (rtnetlink_send(&req.nh) < 0) - goto senderror; + return -1; } + } - - - // Only setup IPv6 on the tun device if we have a configured prefix - if (config->ipv6_prefix.s6_addr[0]) { - struct in6_addr ip6; - - memset(&req, 0, sizeof(req)); - - req.nh.nlmsg_type = RTM_NEWADDR; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); - - req.ifmsg.ifaddr.ifa_family = AF_INET6; - req.ifmsg.ifaddr.ifa_prefixlen = 64; - req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK; - req.ifmsg.ifaddr.ifa_index = tunidx; - - // Link local address is FE80::1 - memset(&ip6, 0, sizeof(ip6)); - ip6.s6_addr[0] = 0xFE; - ip6.s6_addr[1] = 0x80; - ip6.s6_addr[15] = 1; - rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - - if (rtnetlink_send(&req.nh) < 0) - goto senderror; - - memset(&req, 0, sizeof(req)); - - req.nh.nlmsg_type = RTM_NEWADDR; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); - - req.ifmsg.ifaddr.ifa_family = AF_INET6; - req.ifmsg.ifaddr.ifa_prefixlen = 64; - req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; - req.ifmsg.ifaddr.ifa_index = tunidx; - - // Global address is prefix::1 - ip6 = config->ipv6_prefix; - ip6.s6_addr[15] = 1; - rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - - if (rtnetlink_send(&req.nh) < 0) - goto senderror; - } + // Only setup IPv6 on the tun device if we have a configured prefix + if (config->ipv6_prefix.s6_addr[0]) { + struct in6_addr ip6; memset(&req, 0, sizeof(req)); - req.nh.nlmsg_type = NLMSG_DONE; - req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK; + req.ifmsg.ifaddr.ifa_index = ifidx; + + // Link local address is FE80::1 + memset(&ip6, 0, sizeof(ip6)); + ip6.s6_addr[0] = 0xFE; + ip6.s6_addr[1] = 0x80; + ip6.s6_addr[15] = 1; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); if (rtnetlink_send(&req.nh) < 0) - goto senderror; + return -1; - // if we get an error for seqnum < min_initok_nlseqnum, - // we must exit as initialization went wrong - if (config->ipv6_prefix.s6_addr[0]) - min_initok_rtnlseqnum = 5 + 1; // idx + if + addr + 2*addr6 - else - min_initok_rtnlseqnum = 3 + 1; // idx + if + addr + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifidx; + + // Global address is prefix::1 + ip6 = config->ipv6_prefix; + ip6.s6_addr[15] = 1; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + + if (rtnetlink_send(&req.nh) < 0) + return -1; } - return; + memset(&req, 0, sizeof(req)); -senderror: - LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno)); - exit(1); + req.nh.nlmsg_type = NLMSG_DONE; + req.nh.nlmsg_len = NLMSG_LENGTH(0); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + + // if we get an error for seqnum < min_initok_nlseqnum, + // we must exit as initialization went wrong + if (config->ipv6_prefix.s6_addr[0]) + min_initok_rtnlseqnum = 5 + 1; // idx + if + addr + 2*addr6 + else + min_initok_rtnlseqnum = 3 + 1; // idx + if + addr + + return 0; } // set up LAC UDP ports From 79a08faea983aadf1bc49c60b325ac1e8cf3e1b6 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 15:45:16 +0200 Subject: [PATCH 07/28] Generalize L2TP UDP socket creation So we can later use it to create per-tunnel L2TP sockets. --- l2tpns.c | 54 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index a80589d..4787d87 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -253,6 +253,8 @@ struct Tstats *_statistics = NULL; struct Tringbuffer *ringbuffer = NULL; #endif +static int initlacudp(int *pudpfd, in_addr_t ip_dest, uint16_t port_dest); +static int initudp(int * pudpfd, in_addr_t ip_bind, in_addr_t ip_dest, uint16_t port_dest); static int setupif(int ifidx, uint32_t mru, int config_addr); static ssize_t rtnetlink_send(struct nlmsghdr *nh); static ssize_t genetlink_send(struct nlmsghdr *nh); @@ -1655,7 +1657,7 @@ static int setupif(int ifidx, uint32_t mru, int config_addr) } // set up LAC UDP ports -static void initlacudp(void) +static int initlacudp(int *pudpfd, in_addr_t ip_dest, uint16_t port_dest) { int on = 1; struct sockaddr_in addr; @@ -1665,17 +1667,30 @@ static void initlacudp(void) addr.sin_family = AF_INET; addr.sin_port = htons(config->bind_portremotelns); addr.sin_addr.s_addr = config->bind_address_remotelns; - udplacfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - setsockopt(udplacfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + *pudpfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + setsockopt(*pudpfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); { - int flags = fcntl(udplacfd, F_GETFL, 0); - fcntl(udplacfd, F_SETFL, flags | O_NONBLOCK); + int flags = fcntl(*pudpfd, F_GETFL, 0); + fcntl(*pudpfd, F_SETFL, flags | O_NONBLOCK); } - if (bind(udplacfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + if (bind(*pudpfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { LOG(0, 0, 0, "Error in UDP REMOTE LNS bind: %s\n", strerror(errno)); - exit(1); + close(*pudpfd); + return -1; } + if (ip_dest) + { + addr.sin_port = port_dest; + addr.sin_addr.s_addr = ip_dest; + if (connect(*pudpfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + { + LOG(2, 0, 0, "Error in UDP REMOTE LNS connect: %s\n", strerror(errno)); + close(*pudpfd); + return -1; + } + } + return 0; } // set up control ports @@ -1719,7 +1734,7 @@ static void initdae(void) } // set up UDP ports -static void initudp(int * pudpfd, in_addr_t ip_bind) +static int initudp(int * pudpfd, in_addr_t ip_bind, in_addr_t ip_dest, uint16_t port_dest) { int on = 1; struct sockaddr_in addr; @@ -1738,8 +1753,21 @@ static void initudp(int * pudpfd, in_addr_t ip_bind) if (bind((*pudpfd), (struct sockaddr *) &addr, sizeof(addr)) < 0) { LOG(0, 0, 0, "Error in UDP bind: %s\n", strerror(errno)); - exit(1); + close(*pudpfd); + return -1; } + if (ip_dest) + { + addr.sin_port = port_dest; + addr.sin_addr.s_addr = ip_dest; + if (connect((*pudpfd), (struct sockaddr *) &addr, sizeof(addr)) < 0) + { + LOG(2, 0, 0, "Error in UDP connect: %s\n", strerror(errno)); + close(*pudpfd); + return -1; + } + } + return 0; } // @@ -6061,8 +6089,12 @@ int main(int argc, char *argv[]) } config->nbudpfd = config->nbmultiaddress; for (i = 0; i < config->nbudpfd; i++) - initudp(&udpfd[i], config->bind_n_address[i]); - initlacudp(); + { + if (initudp(&udpfd[i], config->bind_n_address[i], 0, 0) < 0) + exit(1); + } + if (initlacudp(&udplacfd, 0, 0) < 0) + exit(1); config->indexlacudpfd = config->nbudpfd; udpfd[config->indexlacudpfd] = udplacfd; config->nbudpfd++; From 1db43fc51dd91e6944cfd1ee1a8dd8c69e2386be Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 15:48:02 +0200 Subject: [PATCH 08/28] Separate out PPP processing So we can later call it on frames obtained from kernel ppp interface. Better seen with diff -w --- l2tpns.c | 326 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 167 insertions(+), 159 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 4787d87..4cd1a28 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -282,6 +282,7 @@ static void processcontrol(uint8_t *buf, int len, struct sockaddr_in *addr, int static tunnelidt new_tunnel(void); static void unhide_value(uint8_t *value, size_t len, uint16_t type, uint8_t *vector, size_t vec_len); static void bundleclear(bundleidt b); +static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, struct sockaddr_in *addr, uint16_t indexudpfd); // return internal time (10ths since process startup), set f if given // as a side-effect sets time_now, and time_changed @@ -4179,180 +4180,187 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu } } else - { // data - uint16_t proto; + // data + processppp(s, buf, len, p, l, addr, indexudpfd); +} - LOG_HEX(5, "Receive Tunnel Data", p, l); - if (l > 2 && p[0] == 0xFF && p[1] == 0x03) - { // HDLC address header, discard - p += 2; - l -= 2; - } - if (l < 2) - { - LOG(1, s, t, "Short ppp length %d\n", l); - STAT(tunnel_rx_errors); - return; - } - if (*p & 1) - { - proto = *p++; - l--; - } - else - { - proto = ntohs(*(uint16_t *) p); - p += 2; - l -= 2; - } +// +// process a ppp frame coming from tunnel +static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, struct sockaddr_in *addr, uint16_t indexudpfd) +{ // data + int t = session[s].tunnel; + uint16_t proto; - if (session[s].forwardtosession) + LOG_HEX(5, "Receive Tunnel Data", p, l); + if (l > 2 && p[0] == 0xFF && p[1] == 0x03) + { // HDLC address header, discard + p += 2; + l -= 2; + } + if (l < 2) + { + LOG(1, s, t, "Short ppp length %d\n", l); + STAT(tunnel_rx_errors); + return; + } + if (*p & 1) + { + proto = *p++; + l--; + } + else + { + proto = ntohs(*(uint16_t *) p); + p += 2; + l -= 2; + } + + if (session[s].forwardtosession) + { + LOG(5, s, t, "Forwarding data session to session %u\n", session[s].forwardtosession); + // Forward to LAC/BAS or Remote LNS session + lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + else if (config->auth_tunnel_change_addr_src) + { + if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && + tunnel[t].port == ntohs(addr->sin_port)) { - LOG(5, s, t, "Forwarding data session to session %u\n", session[s].forwardtosession); - // Forward to LAC/BAS or Remote LNS session - lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; + // The remotes BAS are a clustered l2tpns server and the source IP has changed + LOG(5, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", + fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); + + tunnel[t].ip = ntohl(addr->sin_addr.s_addr); } - else if (config->auth_tunnel_change_addr_src) + } + + if (s && !session[s].opened) // Is something wrong?? + { + if (!config->cluster_iam_master) { - if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && - tunnel[t].port == ntohs(addr->sin_port)) - { - // The remotes BAS are a clustered l2tpns server and the source IP has changed - LOG(5, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", - fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); - - tunnel[t].ip = ntohl(addr->sin_addr.s_addr); - } - } - - if (s && !session[s].opened) // Is something wrong?? - { - if (!config->cluster_iam_master) - { - // Pass it off to the master to deal with.. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - LOG(1, s, t, "UDP packet contains session which is not opened. Dropping packet.\n"); - STAT(tunnel_rx_errors); + // Pass it off to the master to deal with.. + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - if (proto == PPPPAP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processpap(s, t, p, l); - } - else if (proto == PPPCHAP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processchap(s, t, p, l); - } - else if (proto == PPPLCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processlcp(s, t, p, l); - } - else if (proto == PPPIPCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processipcp(s, t, p, l); - } - else if (proto == PPPIPV6CP && config->ipv6_prefix.s6_addr[0]) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processipv6cp(s, t, p, l); - } - else if (proto == PPPCCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processccp(s, t, p, l); - } - else if (proto == PPPIP) - { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } + LOG(1, s, t, "UDP packet contains session which is not opened. Dropping packet.\n"); + STAT(tunnel_rx_errors); + return; + } - session[s].last_packet = session[s].last_data = time_now; - if (session[s].walled_garden && !config->cluster_iam_master) + if (proto == PPPPAP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processpap(s, t, p, l); + } + else if (proto == PPPCHAP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processchap(s, t, p, l); + } + else if (proto == PPPLCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processlcp(s, t, p, l); + } + else if (proto == PPPIPCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processipcp(s, t, p, l); + } + else if (proto == PPPIPV6CP && config->ipv6_prefix.s6_addr[0]) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processipv6cp(s, t, p, l); + } + else if (proto == PPPCCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processccp(s, t, p, l); + } + else if (proto == PPPIP) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (session[s].walled_garden && !config->cluster_iam_master) + { + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + processipin(s, t, p, l); + } + else if (proto == PPPMP) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (!config->cluster_iam_master) + { + // The fragments reconstruction is managed by the Master. + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + processmpin(s, t, p, l); + } + else if (proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0]) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (session[s].walled_garden && !config->cluster_iam_master) + { + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + if (!config->cluster_iam_master) + { + // Check if DhcpV6, IP dst: FF02::1:2, Src Port 0x0222 (546), Dst Port 0x0223 (547) + if (*(p + 6) == 17 && *(p + 24) == 0xFF && *(p + 25) == 2 && + *(uint32_t *)(p + 26) == 0 && *(uint32_t *)(p + 30) == 0 && + *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && + *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) { + // DHCPV6 must be managed by the Master. master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - - processipin(s, t, p, l); } - else if (proto == PPPMP) - { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } - session[s].last_packet = session[s].last_data = time_now; - if (!config->cluster_iam_master) - { - // The fragments reconstruction is managed by the Master. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - processmpin(s, t, p, l); - } - else if (proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0]) - { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } - - session[s].last_packet = session[s].last_data = time_now; - if (session[s].walled_garden && !config->cluster_iam_master) - { - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - if (!config->cluster_iam_master) - { - // Check if DhcpV6, IP dst: FF02::1:2, Src Port 0x0222 (546), Dst Port 0x0223 (547) - if (*(p + 6) == 17 && *(p + 24) == 0xFF && *(p + 25) == 2 && - *(uint32_t *)(p + 26) == 0 && *(uint32_t *)(p + 30) == 0 && - *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && - *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) - { - // DHCPV6 must be managed by the Master. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - } - - processipv6in(s, t, p, l); - } - else if (session[s].ppp.lcp == Opened) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - protoreject(s, t, p, l, proto); - } - else - { - LOG(2, s, t, "Unknown PPP protocol 0x%04X received in LCP %s state\n", - proto, ppp_state(session[s].ppp.lcp)); - } + processipv6in(s, t, p, l); + } + else if (session[s].ppp.lcp == Opened) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + protoreject(s, t, p, l, proto); + } + else + { + LOG(2, s, t, "Unknown PPP protocol 0x%04X received in LCP %s state\n", + proto, ppp_state(session[s].ppp.lcp)); } } From c7853de42816c24ca212ff26846a15547d021a01 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 16:47:26 +0200 Subject: [PATCH 09/28] Consolidate stat update code into update_session_in_stat update_session_out_stat --- l2tplac.c | 16 ++-------------- l2tpns.c | 37 ++++++++++++++++++++++++++++--------- l2tpns.h | 2 ++ ppp.c | 20 +++----------------- pppoe.c | 36 ++++++------------------------------ 5 files changed, 41 insertions(+), 70 deletions(-) diff --git a/l2tplac.c b/l2tplac.c index 8be2261..0cec975 100644 --- a/l2tplac.c +++ b/l2tplac.c @@ -532,20 +532,8 @@ int lac_session_forward(uint8_t *buf, int len, sessionidt sess, uint16_t proto, if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, len); - session[sess].cin_delta += len; - session[sess].pin++; - sess_local[sess].cin += len; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, len); // byte count - session[s].cout_delta += len; - session[s].pout++; - sess_local[s].cout += len; - sess_local[s].pout++; + update_session_in_stat(sess, 1, len); + update_session_out_stat(s, 1, len); } else session[sess].last_packet = time_now; diff --git a/l2tpns.c b/l2tpns.c index 4cd1a28..3e6d846 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -2276,15 +2276,34 @@ void processmpframe(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l, uint8_t e } } -static void update_session_out_stat(sessionidt s, sessiont *sp, int len) +// +// Account for some incoming packets in the session statistics +void update_session_in_stat(sessionidt s, int packets, size_t len) { + sessiont *sp = &session[s]; + + increment_counter(&sp->cin, &sp->cin_wrap, len); // byte count + sp->cin_delta += len; + sp->pin += packets; + sp->last_data = time_now; + + sess_local[s].cin += len; // To send to master.. + sess_local[s].pin += packets; +} + +// +// Account for some outgoing packets in the session statistics +void update_session_out_stat(sessionidt s, int packets, size_t len) +{ + sessiont *sp = &session[s]; + increment_counter(&sp->cout, &sp->cout_wrap, len); // byte count sp->cout_delta += len; - sp->pout++; + sp->pout += packets; sp->last_data = time_now; sess_local[s].cout += len; // To send to master.. - sess_local[s].pout++; + sess_local[s].pout += packets; } // process outgoing (to tunnel) IP @@ -2493,7 +2512,7 @@ void processipout(uint8_t *buf, int len) tunnelsend(fragbuf, fraglen + (p-fragbuf), t); // send it... // statistics - update_session_out_stat(s, sp, fraglen); + update_session_out_stat(s, 1, fraglen); remain -= fraglen; while (remain > last_fraglen) @@ -2506,7 +2525,7 @@ void processipout(uint8_t *buf, int len) p = makeppp(fragbuf, sizeof(fragbuf), buf+(len - remain), fraglen, s, t, PPPIP, 0, bid, 0); if (!p) return; tunnelsend(fragbuf, fraglen + (p-fragbuf), t); // send it... - update_session_out_stat(s, sp, fraglen); + update_session_out_stat(s, 1, fraglen); remain -= fraglen; } // send the last fragment @@ -2518,7 +2537,7 @@ void processipout(uint8_t *buf, int len) p = makeppp(fragbuf, sizeof(fragbuf), buf+(len - remain), remain, s, t, PPPIP, 0, bid, MP_END); if (!p) return; tunnelsend(fragbuf, remain + (p-fragbuf), t); // send it... - update_session_out_stat(s, sp, remain); + update_session_out_stat(s, 1, remain); if (remain != last_fraglen) LOG(3, s, t, "PROCESSIPOUT ERROR REMAIN != LAST_FRAGLEN, %d != %d\n", remain, last_fraglen); } @@ -2529,7 +2548,7 @@ void processipout(uint8_t *buf, int len) if (!p) return; tunnelsend(fragbuf, len + (p-fragbuf), t); // send it... LOG(4, s, t, "MPPP: packet sent as one frame\n"); - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } } else @@ -2537,14 +2556,14 @@ void processipout(uint8_t *buf, int len) // Send it as one frame (NO MPPP Frame) uint8_t *p = opt_makeppp(buf, len, s, t, PPPIP, 0, 0, 0); tunnelsend(p, len + (buf-p), t); // send it... - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } } else { uint8_t *p = opt_makeppp(buf, len, s, t, PPPIP, 0, 0, 0); tunnelsend(p, len + (buf-p), t); // send it... - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } // Snooping this session, send it to intercept box diff --git a/l2tpns.h b/l2tpns.h index 8b1f121..c28eccc 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -979,6 +979,8 @@ void adjust_tcp6_mss(sessionidt s, tunnelidt t, uint8_t *buf, int len, uint8_t * void sendipcp(sessionidt s, tunnelidt t); void sendipv6cp(sessionidt s, tunnelidt t); void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexudpfd); +void update_session_in_stat(sessionidt s, int packets, size_t len); +void update_session_out_stat(sessionidt s, int packets, size_t len); void processipout(uint8_t *buf, int len); void snoop_send_packet(uint8_t *packet, uint16_t size, in_addr_t destination, uint16_t port); int find_filter(char const *name, size_t len); diff --git a/ppp.c b/ppp.c index 08c2ef0..36645e0 100644 --- a/ppp.c +++ b/ppp.c @@ -1722,12 +1722,7 @@ static void update_sessions_in_stat(sessionidt s, uint16_t l) bundleidt b = session[s].bundle; if (!b) { - increment_counter(&session[s].cin, &session[s].cin_wrap, l); - session[s].cin_delta += l; - session[s].pin++; - - sess_local[s].cin += l; - sess_local[s].pin++; + update_session_in_stat(s, 1, l); } else { @@ -1737,12 +1732,8 @@ static void update_sessions_in_stat(sessionidt s, uint16_t l) { l = frag[b].fragment[i].length; s = frag[b].fragment[i].sid; - increment_counter(&session[s].cin, &session[s].cin_wrap, l); - session[s].cin_delta += l; - session[s].pin++; + update_session_in_stat(s, 1, l); - sess_local[s].cin += l; - sess_local[s].pin++; if (i == end) return; i = (i + 1) & MAXFRAGNUM_MASK; @@ -2383,12 +2374,7 @@ void send_ipin(sessionidt s, uint8_t *buf, int len) } // Increment packet counters - increment_counter(&session[s].cin, &session[s].cin_wrap, len); - session[s].cin_delta += len; - session[s].pin++; - - sess_local[s].cin += len; - sess_local[s].pin++; + update_session_in_stat(s, 1, len); eth_tx += len; diff --git a/pppoe.c b/pppoe.c index 935bd0f..c60f4fb 100644 --- a/pppoe.c +++ b/pppoe.c @@ -955,21 +955,9 @@ static void pppoe_forwardto_session_rmlns(uint8_t *pack, int size, sessionidt se if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { - session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, ll2tp); - session[sess].cin_delta += ll2tp; - session[sess].pin++; - sess_local[sess].cin += ll2tp; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, ll2tp); // byte count - session[s].cout_delta += ll2tp; - session[s].pout++; - sess_local[s].cout += ll2tp; - sess_local[s].pout++; + session[sess].last_packet = time_now; + update_session_in_stat(s, 1, ll2tp); + update_session_out_stat(s, 1, ll2tp); } else session[sess].last_packet = time_now; @@ -1026,21 +1014,9 @@ void pppoe_forwardto_session_pppoe(uint8_t *pack, int size, sessionidt sess, uin if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { - session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, lpppoe); - session[sess].cin_delta += lpppoe; - session[sess].pin++; - sess_local[sess].cin += lpppoe; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, lpppoe); // byte count - session[s].cout_delta += lpppoe; - session[s].pout++; - sess_local[s].cout += lpppoe; - sess_local[s].pout++; + session[sess].last_packet = time_now; + update_session_in_stat(s, 1, lpppoe); + update_session_out_stat(s, 1, lpppoe); } else session[sess].last_packet = time_now; From 8c04e26f3f0efcb790a458581d50b42f4c844e6c Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 18:47:44 +0200 Subject: [PATCH 10/28] Consolidate adding/removing routes into routesset and routes6set. --- l2tpns.c | 186 +++++++++++++++++++++++-------------------------------- l2tpns.h | 1 + ppp.c | 13 +--- 3 files changed, 79 insertions(+), 121 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 3e6d846..eb18f4f 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -263,6 +263,7 @@ static int netlink_handle_ack(struct nlmsghdr *nh, int gen, int min_initok_nlseq static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); static void genetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); static int genetlink_getattr(struct nlmsghdr *nh, int type, void *data, int alen); +static void routesset(sessionidt s, sessiont *sp, int add); static void cache_ipmap(in_addr_t ip, sessionidt s); static void uncache_ipmap(in_addr_t ip); static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s); @@ -1115,6 +1116,51 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in } } +// +// Add or remove the routes for a session +static void routesset(sessionidt s, sessiont *sp, int add) +{ + int r; + int routed = 0; + + // add/remove routes... + for (r = 0; r < MAXROUTE && sp->route[r].ip; r++) + { + if ((sp->ip >> (32-sp->route[r].prefixlen)) == + (sp->route[r].ip >> (32-sp->route[r].prefixlen))) + routed++; + + routeset(s, sp->route[r].ip, sp->route[r].prefixlen, 0, add); + } + + // ...ip + if (sp->ip) + { + // Static IPs need to be routed if not already + // convered by a Framed-Route. Anything else is part + // of the IP address pool and is already routed, it + // just needs to be added to the IP cache. + // IPv6 route setup is done in ppp.c, when IPV6CP is acked. + if (sp->ip_pool_index == -1) // static ip + { + if (!routed) routeset(s, sp->ip, 0, 0, add); + } + else // It's part of the IP pool, add/remove it manually. + { + if (add) + cache_ipmap(sp->ip, s); + else + uncache_ipmap(sp->ip); + } + } +} + +// Add an IPv6 route +// +// This adds it to the routing table, advertises it +// via BGP if enabled, and stuffs it into the +// 'sessionbyip' cache. +// void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) { struct { @@ -1184,6 +1230,25 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) return; } +// +// Add or remove the IPv6 routes for a session +void routes6set(sessionidt s, sessiont *sp, int add) +{ + int r; + + for (r = 0; r < MAXROUTE6 && sp->route6[r].ipv6prefixlen; r++) + { + route6set(s, sp->route6[r].ipv6route, sp->route6[r].ipv6prefixlen, add); + } + + if (sp->ipv6address.s6_addr[0]) + { + // Check if included in prefix + if (!add || sessionbyipv6(sp->ipv6address) != s) + route6set(s, sp->ipv6address, 128, add); + } +} + // // Get L2TP netlink id static int16_t netlink_get_l2tp_id(void) @@ -3001,38 +3066,27 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e if (session[s].ip) { // IP allocated, clear and unroute - int r; - int routed = 0; - for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) - { - if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == - (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) - routed++; + if (del_routes) + routesset(s, &session[s], 0); - if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 0); + int r; + for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) session[s].route[r].ip = 0; - } if (session[s].ip_pool_index == -1) // static ip - { - if (!routed && del_routes) routeset(s, session[s].ip, 0, 0, 0); session[s].ip = 0; - } else free_ip_address(s); + if (del_routes) + routes6set(s, &session[s], 0); + // unroute IPv6, if setup for (r = 0; r < MAXROUTE6 && session[s].route6[r].ipv6route.s6_addr[0] && session[s].route6[r].ipv6prefixlen; r++) { - if (del_routes) route6set(s, session[s].route6[r].ipv6route, session[s].route6[r].ipv6prefixlen, 0); memset(&session[s].route6[r], 0, sizeof(session[s].route6[r])); } - if (session[s].ipv6address.s6_addr[0] && del_routes) - { - route6set(s, session[s].ipv6address, 128, 0); - } - if (b) { // This session was part of a bundle @@ -3067,7 +3121,6 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e { sessionidt new_s = bundle[b].members[0]; - routed = 0; // Add the route for this session. for (r = 0; r < MAXROUTE && session[new_s].route[r].ip; r++) { @@ -6602,7 +6655,6 @@ int sessionsetup(sessionidt s, tunnelidt t) in_addr_t ip; char *user; sessionidt i; - int r; CSTAT(sessionsetup); @@ -6680,31 +6732,8 @@ int sessionsetup(sessionidt s, tunnelidt t) // no need to set a route for the same IP address of the bundle if (!session[s].bundle || (bundle[session[s].bundle].num_of_links == 1)) - { - int routed = 0; - // Add the route for this session. - for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) - { - if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == - (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) - routed++; - - routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 1); - } - - // Static IPs need to be routed if not already - // convered by a Framed-Route. Anything else is part - // of the IP address pool and is already routed, it - // just needs to be added to the IP cache. - // IPv6 route setup is done in ppp.c, when IPV6CP is acked. - if (session[s].ip_pool_index == -1) // static ip - { - if (!routed) routeset(s, session[s].ip, 0, 0, 1); - } - else - cache_ipmap(session[s].ip, s); - } + routesset(s, &session[s], 1); sess_local[s].lcp_authtype = 0; // RADIUS authentication complete lcp_open(s, t); // transition to Network phase and send initial IPCP @@ -6767,80 +6796,19 @@ int load_session(sessionidt s, sessiont *new) // needs update if (newip) { - int routed = 0; - // remove old routes... - for (i = 0; i < MAXROUTE && session[s].route[i].ip; i++) - { - if ((session[s].ip >> (32-session[s].route[i].prefixlen)) == - (session[s].route[i].ip >> (32-session[s].route[i].prefixlen))) - routed++; - - routeset(s, session[s].route[i].ip, session[s].route[i].prefixlen, 0, 0); - } - - // ...ip - if (session[s].ip) - { - if (session[s].ip_pool_index == -1) // static IP - { - if (!routed) routeset(s, session[s].ip, 0, 0, 0); - } - else // It's part of the IP pool, remove it manually. - uncache_ipmap(session[s].ip); - } + routesset(s, &session[s], 0); // remove old IPV6 routes... - for (i = 0; i < MAXROUTE6 && session[s].route6[i].ipv6route.s6_addr[0] && session[s].route6[i].ipv6prefixlen; i++) - { - route6set(s, session[s].route6[i].ipv6route, session[s].route6[i].ipv6prefixlen, 0); - } - - if (session[s].ipv6address.s6_addr[0]) - { - route6set(s, session[s].ipv6address, 128, 0); - } - - routed = 0; + routes6set(s, &session[s], 0); // add new routes... - for (i = 0; i < MAXROUTE && new->route[i].ip; i++) - { - if ((new->ip >> (32-new->route[i].prefixlen)) == - (new->route[i].ip >> (32-new->route[i].prefixlen))) - routed++; - - routeset(s, new->route[i].ip, new->route[i].prefixlen, 0, 1); - } - - // ...ip - if (new->ip) - { - // If there's a new one, add it. - if (new->ip_pool_index == -1) - { - if (!routed) routeset(s, new->ip, 0, 0, 1); - } - else - cache_ipmap(new->ip, s); - } + routesset(s, new, 1); } // check v6 routing if (new->ppp.ipv6cp == Opened && session[s].ppp.ipv6cp != Opened) - { - for (i = 0; i < MAXROUTE6 && new->route6[i].ipv6prefixlen; i++) - { - route6set(s, new->route6[i].ipv6route, new->route6[i].ipv6prefixlen, 1); - } - } - - if (new->ipv6address.s6_addr[0] && new->ppp.ipv6cp == Opened && session[s].ppp.ipv6cp != Opened) - { - // Check if included in prefix - if (sessionbyipv6(new->ipv6address) != s) - route6set(s, new->ipv6address, 128, 1); - } + routes6set(s, new, 1); // check filters if (new->filter_in && (new->filter_in > MAXFILTER || !ip_filters[new->filter_in - 1].name[0])) diff --git a/l2tpns.h b/l2tpns.h index c28eccc..9220b9e 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -962,6 +962,7 @@ int rad_tunnel_pwdecode(uint8_t *pl2tpsecret, size_t *pl2tpsecretlen, const char clockt backoff(uint8_t try); void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip); void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add); +void routes6set(sessionidt s, sessiont *sp, int add); sessionidt sessionbyip(in_addr_t ip); sessionidt sessionbyipv6(struct in6_addr ip); sessionidt sessionbyipv6new(struct in6_addr ip); diff --git a/ppp.c b/ppp.c index 36645e0..43c4213 100644 --- a/ppp.c +++ b/ppp.c @@ -1479,21 +1479,10 @@ void processipcp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) static void ipv6cp_open(sessionidt s, tunnelidt t) { - int i; LOG(3, s, t, "IPV6CP: Opened\n"); change_state(s, ipv6cp, Opened); - for (i = 0; i < MAXROUTE6 && session[s].route6[i].ipv6prefixlen; i++) - { - route6set(s, session[s].route6[i].ipv6route, session[s].route6[i].ipv6prefixlen, 1); - } - - if (session[s].ipv6address.s6_addr[0]) - { - // Check if included in prefix - if (sessionbyipv6(session[s].ipv6address) != s) - route6set(s, session[s].ipv6address, 128, 1); - } + routes6set(s, &session[s], 1); // Send an initial RA send_ipv6_ra(s, t, NULL); From e53fccd36c5c3da010f72a4cfbc851a2926398aa Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 4 Jan 2024 00:09:24 +0100 Subject: [PATCH 11/28] Add route for session-specific subprefix of the ipv6 prefix Otherwise with kernel acceleration we would only have the same /64 route on all ppp devices. We need separate /96 routes on each ppp device for packets of the /96 subprefix to be routed to the correct session. --- l2tpns.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/l2tpns.c b/l2tpns.c index eb18f4f..209d637 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -1247,6 +1247,15 @@ void routes6set(sessionidt s, sessiont *sp, int add) if (!add || sessionbyipv6(sp->ipv6address) != s) route6set(s, sp->ipv6address, 128, add); } + else + { + in_addr_t addr_ipv4 = htonl(session[s].ip); + struct in6_addr addr; + memset(&addr, 0, sizeof(addr)); + memcpy(&addr, &config->ipv6_prefix, 8); + memcpy(&addr.s6_addr[8], &addr_ipv4, 4); + route6set(s, addr, 96, add); + } } // From c3eb1be0b41d89ee700e0a431ff6163a63df8f45 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 17 May 2023 23:42:08 +0200 Subject: [PATCH 12/28] Separate out DHCP processing So we can later call it on UDP datagrams obtained from UDP socket. --- dhcp6.c | 53 ++++++++++++++++++++++++++++++++++++++++------------- dhcp6.h | 2 +- icmp.c | 2 +- l2tpns.h | 2 +- ppp.c | 2 +- 5 files changed, 44 insertions(+), 17 deletions(-) diff --git a/dhcp6.c b/dhcp6.c index cb145eb..4454f4e 100644 --- a/dhcp6.c +++ b/dhcp6.c @@ -29,7 +29,7 @@ static struct dhcp6_in_option list_option; static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer); -static void dhcp6_send_reply(sessionidt s, tunnelidt t, struct in6_addr *ip6_src) +static void dhcp6_send_reply(sessionidt s, tunnelidt t, const struct in6_addr *ip6_src) { struct ip6_hdr *p_ip6_hdr; struct udphdr *p_udp; @@ -309,26 +309,22 @@ static char * get_msg_type(uint8_t type) } } -void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) +void dhcpv6_process(sessionidt s, tunnelidt t, const struct in6_addr *addr, uint8_t *p, uint16_t l) { - struct ip6_hdr *p_ip6_hdr_in; - struct dhcp6_mess_hdr *p_mess_hdr; + struct dhcp6_mess_hdr *p_mess_hdr = (struct dhcp6_mess_hdr *) p; struct dhcp6_opt_h *p_opt; uint8_t *p_end; uint16_t len; CSTAT(dhcpv6_process); - p_ip6_hdr_in = (struct ip6_hdr *) p; - p_mess_hdr = (struct dhcp6_mess_hdr *) (p + 48); - LOG(3, s, t, "Got DHCPv6 message Type: %s(%d)\n", get_msg_type(p_mess_hdr->type), p_mess_hdr->type); if (!session[s].route6[0].ipv6route.s6_addr[0] || !session[s].route6[0].ipv6prefixlen) return; p_opt = (struct dhcp6_opt_h *) &p_mess_hdr[1]; - p_end = ((uint8_t *)p_ip6_hdr_in) + ntohs(p_ip6_hdr_in->ip6_plen) + sizeof(*p_ip6_hdr_in); + p_end = p + l; memset(&list_option, 0, sizeof(list_option)); list_option.p_mess_hdr = p_mess_hdr; while (((uint8_t *)p_opt) < p_end) @@ -413,7 +409,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -458,8 +454,8 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); - send_ipv6_ra(s, t, &p_ip6_hdr_in->ip6_src); // send a RA + dhcp6_send_reply(s, t, addr); + send_ipv6_ra(s, t, addr); // send a RA } break; @@ -489,7 +485,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -501,7 +497,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -527,6 +523,37 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } +void dhcpv6_process_from_ipv6(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) +{ + struct ip6_hdr *p_ip6_hdr_in = (struct ip6_hdr *) p; + struct in6_addr *addr = &p_ip6_hdr_in->ip6_src; + uint16_t ipv6_len = ntohs(p_ip6_hdr_in->ip6_plen); + + l -= sizeof(*p_ip6_hdr_in); + p += sizeof(*p_ip6_hdr_in); + + if (ipv6_len > l) + { + LOG(5, 0, 0, "bogus IPv6 packet size??\n"); + return; + } + + if (p_ip6_hdr_in->ip6_nxt != 17) + { + LOG(5, 0, 0, "not UDP DHCP packet??\n"); + return; + } + if (ipv6_len < sizeof(struct udphdr)) + { + LOG(5, 0, 0, "bogus IPv6 packet size for UDP??\n"); + return; + } + ipv6_len -= sizeof(struct udphdr); + p += sizeof(struct udphdr); + + dhcpv6_process(s, t, addr, p, ipv6_len); +} + static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer) { int n = strlen(strdns); diff --git a/dhcp6.h b/dhcp6.h index 4b0eb19..54cd76d 100644 --- a/dhcp6.h +++ b/dhcp6.h @@ -212,7 +212,7 @@ struct dhcp6_opt_ia_prefix { } __attribute__((packed)); // dhcp6.c -void dhcpv6_process(uint16_t s, uint16_t t, uint8_t *p, uint16_t l); +void dhcpv6_process_from_ipv6(uint16_t s, uint16_t t, uint8_t *p, uint16_t l); void dhcpv6_init(void); #endif /* __DHCP6_H__ */ diff --git a/icmp.c b/icmp.c index 54690eb..168eb5f 100644 --- a/icmp.c +++ b/icmp.c @@ -96,7 +96,7 @@ struct nd_opt_rdnss_info_l2tpns struct in6_addr nd_opt_rdnssi[0]; }; -void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip) +void send_ipv6_ra(sessionidt s, tunnelidt t, const struct in6_addr *ip) { struct nd_opt_prefix_info *pinfo; struct ip6_hdr *p_ip6_hdr; diff --git a/l2tpns.h b/l2tpns.h index 9220b9e..80cccf5 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -960,7 +960,7 @@ int rad_tunnel_pwdecode(uint8_t *pl2tpsecret, size_t *pl2tpsecretlen, const char // l2tpns.c clockt backoff(uint8_t try); -void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip); +void send_ipv6_ra(sessionidt s, tunnelidt t, const struct in6_addr *ip); void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add); void routes6set(sessionidt s, sessiont *sp, int add); sessionidt sessionbyip(in_addr_t ip); diff --git a/ppp.c b/ppp.c index 43c4213..1f5791f 100644 --- a/ppp.c +++ b/ppp.c @@ -2286,7 +2286,7 @@ void processipv6in(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) { - dhcpv6_process(s, t, p, l); + dhcpv6_process_from_ipv6(s, t, p, l); return; } From 13d7080ac19a5ac762022131e541ce27fc716c18 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 18 May 2023 00:38:19 +0200 Subject: [PATCH 13/28] Add recvfromto6 --- l2tpns.c | 4 +-- util.c | 93 +++++++++++++++++++++++++++++++++++++++++++------------- util.h | 5 ++- 3 files changed, 78 insertions(+), 24 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 209d637..1fa32a0 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -5205,14 +5205,14 @@ static void mainloop(void) case FD_TYPE_CONTROL: // nsctl commands alen = sizeof(addr); - s = recvfromto(controlfd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local); + s = recvfromto(controlfd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local, NULL); if (s > 0) processcontrol(p, s, &addr, alen, &local); n--; break; case FD_TYPE_DAE: // DAE requests alen = sizeof(addr); - s = recvfromto(daefd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local); + s = recvfromto(daefd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local, NULL); if (s > 0) processdae(p, s, &addr, alen, &local); n--; break; diff --git a/util.c b/util.c index 5c7c44b..6e50fce 100644 --- a/util.c +++ b/util.c @@ -1,9 +1,11 @@ /* Misc util functions */ +#define _GNU_SOURCE #include #include #include #include +#include #include #include #include @@ -132,41 +134,90 @@ pid_t fork_and_close() return pid; } +static ssize_t recvfromtox(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, char *cbuf, size_t cbuflen, struct msghdr *msg) +{ + ssize_t r; + struct iovec vec; + + memset(msg, 0, sizeof(*msg)); + msg->msg_name = from; + msg->msg_namelen = *fromlen; + + vec.iov_base = buf; + vec.iov_len = len; + msg->msg_iov = &vec; + msg->msg_iovlen = 1; + msg->msg_flags = 0; + + msg->msg_control = cbuf; + msg->msg_controllen = cbuflen; + + if ((r = recvmsg(s, msg, flags)) < 0) + return r; + + if (fromlen) + *fromlen = msg->msg_namelen; + + return r; +} + ssize_t recvfromto(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr) + struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr, int *ifidx) { ssize_t r; struct msghdr msg; struct cmsghdr *cmsg; - struct iovec vec; - char cbuf[128]; + char cbuf[BUFSIZ]; - memset(&msg, 0, sizeof(msg)); - msg.msg_name = from; - msg.msg_namelen = *fromlen; - - vec.iov_base = buf; - vec.iov_len = len; - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_flags = 0; - - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - - if ((r = recvmsg(s, &msg, flags)) < 0) + if ((r = recvfromtox(s, buf, len, flags, from, fromlen, cbuf, sizeof(cbuf), &msg)) < 0) return r; - if (fromlen) - *fromlen = msg.msg_namelen; - memset(toaddr, 0, sizeof(*toaddr)); + if (toaddr) + memset(toaddr, 0, sizeof(*toaddr)); + if (ifidx) + *ifidx = -1; for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_PKTINFO) { struct in_pktinfo *i = (struct in_pktinfo *) CMSG_DATA(cmsg); - memcpy(toaddr, &i->ipi_addr, sizeof(*toaddr)); + if (toaddr) + memcpy(toaddr, &i->ipi_addr, sizeof(*toaddr)); + if (ifidx) + *ifidx = i->ipi_ifindex; + break; + } + } + + return r; +} + +ssize_t recvfromto6(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, struct in6_addr *toaddr, int *ifidx) +{ + ssize_t r; + struct msghdr msg; + struct cmsghdr *cmsg; + char cbuf[BUFSIZ]; + + if ((r = recvfromtox(s, buf, len, flags, from, fromlen, cbuf, sizeof(cbuf), &msg)) < 0) + return r; + + if (toaddr) + memset(toaddr, 0, sizeof(*toaddr)); + if (ifidx) + *ifidx = -1; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) + { + struct in6_pktinfo *i = (struct in6_pktinfo *) CMSG_DATA(cmsg); + if (toaddr) + memcpy(toaddr, &i->ipi6_addr, sizeof(*toaddr)); + if (ifidx) + *ifidx = i->ipi6_ifindex; break; } } diff --git a/util.h b/util.h index 332c6f8..88ebeaf 100644 --- a/util.h +++ b/util.h @@ -10,6 +10,9 @@ ssize_t sendtofrom(int s, void const *buf, size_t len, int flags, struct sockaddr const *to, socklen_t tolen, struct in_addr const *from); ssize_t recvfromto(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr); + struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr, int *ifidx); + +ssize_t recvfromto6(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, struct in6_addr *toaddr, int *ifidx); #endif /* __UTIL_H__ */ From a9e18411d36b0d73ff5db05598ded388a13211a1 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 23 Apr 2023 13:50:04 +0200 Subject: [PATCH 14/28] Add L2TP offloading support Fixes #13 Also-by: Dominique Martinet --- cluster.c | 9 +- docs/manpages/startup-config.5 | 6 + docs/src/man/startup-config.5.md | 4 + etc/startup-config.default | 3 + garden.c | 2 + l2tpns.c | 684 +++++++++++++++++++++++++++++-- l2tpns.h | 30 +- util.c | 12 + 8 files changed, 715 insertions(+), 35 deletions(-) diff --git a/cluster.c b/cluster.c index d68918a..ddfc1fe 100644 --- a/cluster.c +++ b/cluster.c @@ -1364,14 +1364,7 @@ static int cluster_recv_tunnel(int more, uint8_t *p) } } - memcpy(&tunnel[more], p, sizeof(tunnel[more]) ); - - // - // Clear tunnel control messages. These are dynamically allocated. - // If we get unlucky, this may cause the tunnel to drop! - // - tunnel[more].controls = tunnel[more].controle = NULL; - tunnel[more].controlc = 0; + load_tunnel(more, (tunnelt *) p); // Copy tunnel into tunnel table.. LOG(5, 0, more, "Received tunnel update\n"); diff --git a/docs/manpages/startup-config.5 b/docs/manpages/startup-config.5 index e039c15..e1babbd 100644 --- a/docs/manpages/startup-config.5 +++ b/docs/manpages/startup-config.5 @@ -412,6 +412,12 @@ connected users. .PP Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +.PP +\f[B]kernel_accel\f[R] (boolean) +.PP +Determines whether or not to enable kernel acceleration. +Note that only one l2tpns instance can use it per network namespace, +otherwise they will step on each other. .SS DHCPv6 And IPv6 SETTINGS .PP \f[B]dhcp6_preferred_lifetime\f[R] (int) diff --git a/docs/src/man/startup-config.5.md b/docs/src/man/startup-config.5.md index 885470a..60176e3 100644 --- a/docs/src/man/startup-config.5.md +++ b/docs/src/man/startup-config.5.md @@ -251,6 +251,10 @@ The following `variables` may be set: Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +**kernel\_accel** (boolean) + + Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. + ## DHCPv6 And IPv6 SETTINGS **dhcp6\_preferred\_lifetime** (int) diff --git a/etc/startup-config.default b/etc/startup-config.default index 2249454..3b3f223 100644 --- a/etc/startup-config.default +++ b/etc/startup-config.default @@ -146,3 +146,6 @@ set ppp_keepalive yes # Walled garden #load plugin "garden" + +# Kernel acceleration, enable on no more than one instance! +#set kernel_accel yes diff --git a/garden.c b/garden.c index 0e6880c..bbfca34 100644 --- a/garden.c +++ b/garden.c @@ -238,6 +238,8 @@ int garden_session(sessiont *s, int flag, char *newuser) s->walled_garden = 0; + // TODO: try to enable kernel acceleration with switch_kernel_accel(s); + if (flag != F_CLEANUP) { /* OK, we're up! */ diff --git a/l2tpns.c b/l2tpns.c index 1fa32a0..54efdf0 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -49,6 +49,8 @@ #define PPPIOCUNBRIDGECHAN _IO('t', 54) #endif +#define PPP_IF_PREFIX "ppp" + #include "md5.h" #include "dhcp6.h" #include "l2tpns.h" @@ -68,6 +70,13 @@ #include "pppoe.h" #include "dhcp6.h" +#ifdef HAVE_EPOLL +# include +#else +# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ +# include "fake_epoll.h" +#endif + char * Vendor_name = "Linux L2TPNS"; uint32_t call_serial_number = 0; @@ -215,6 +224,7 @@ config_descriptt config_values[] = { CONFIG("primary_ipv6_dns", default_ipv6_dns1, IPv6), CONFIG("secondary_ipv6_dns", default_ipv6_dns2, IPv6), CONFIG("default_ipv6_domain_list", default_ipv6_domain_list, STRING), + CONFIG("kernel_accel", kernel_accel, BOOL), { NULL, 0, 0, 0 } }; @@ -240,6 +250,7 @@ static sessiont shut_acct[8192]; static sessionidt shut_acct_n = 0; tunnelt *tunnel = NULL; // Array of tunnel structures. +tunnellocalt *tunn_local = NULL; // Array of local per-tunnel structures. bundlet *bundle = NULL; // Array of bundle structures. fragmentationt *frag = NULL; // Array of fragmentation structures. sessiont *session = NULL; // Array of session structures. @@ -612,12 +623,23 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) char data[64]; } req; + if (!config->kernel_accel) + { + /* Disabled */ + errno = EPERM; + return -1; + } + if (genl_l2tp_id < 0) { errno = ENOSYS; return -1; } + if (tunn_local[tid].l2tp_fd >= 0) + /* Already set up */ + return 0; + LOG(3, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); memset(&req, 0, sizeof(req)); @@ -629,7 +651,11 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) req.glh.cmd = L2TP_CMD_TUNNEL_CREATE; req.glh.version = L2TP_GENL_VERSION; - uint32_t fd = udpfd[tunnel[tid].indexudp]; + int fd; + if (initudp(&fd, config->bind_n_address[tunnel[tid].indexudp], + htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + genetlink_addattr(&req.nh, L2TP_ATTR_FD, &fd, sizeof(fd)); genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); @@ -643,6 +669,7 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) if (genetlink_send(&req.nh) < 0) { LOG(2, 0, tid, "Can't create tunnel %d to %d: %s\n", tid, peer_tid, strerror(errno)); + close(fd); return -1; } @@ -650,12 +677,48 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) if (size < 0) { LOG(1, 0, 0, "Can't receive answer for tunnel creation: %s\n", strerror(errno)); + close(fd); return -1; } if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + { + close(fd); + return -1; + } + + struct epoll_event e; + static struct event_data d1[MAXTUNNEL]; + e.events = EPOLLIN; + d1[tid].type = FD_TYPE_L2TP; + d1[tid].index = tid; + e.data.ptr = &d1[tid]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &e); + + tunn_local[tid].l2tp_fd = fd; + return 0; +} + +// +// Update remote address of kernel tunnel +static int update_kernel_tunnel(sessionidt s, tunnelidt t) +{ + if (tunn_local[t].l2tp_fd < 0) return -1; + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int ret = connect(tunn_local[t].l2tp_fd, (struct sockaddr *)&tunneladdr, sizeof(tunneladdr)); + if (ret < 0) + { + LOG(2, s, t, "Can't switch tunnel UDP socket: %s\n", strerror(errno)); + return -1; + } return 0; } @@ -719,9 +782,17 @@ static int delete_kernel_tunnel(uint32_t tid) LOG(1, 0, 0, "Can't receive answer for tunnel deletion: %s\n", strerror(errno)); return -1; } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) return -1; + if (tunn_local[tid].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[tid].l2tp_fd, NULL); + close(tunn_local[tid].l2tp_fd); + tunn_local[tid].l2tp_fd = -1; + } + return 0; } @@ -755,6 +826,13 @@ static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, return -1; } + if (tunn_local[tid].l2tp_fd < 0) + { + /* Didn't create kernel tunnel first */ + errno = ENOENT; + return -1; + } + LOG(3, sid, tid, "Creating kernel session from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); memset(&req, 0, sizeof(req)); @@ -916,6 +994,37 @@ static int create_ppp_socket(int udp_fd, uint32_t tid, uint32_t peer_tid, uint32 return pppox_fd; } +// +// Create the kernel session and PPPoX socket for this session +static int create_kernel_pppox(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (tunn_local[t].l2tp_fd < 0) + /* Tunnel not set up yet */ + return -1; + + tunnelidt tfar = tunnel[t].far; + sessionidt sfar = session[s].far; + + LOG(3, s, t, "Creating kernel-accelerated pppox socket from %u:%u to %u:%u\n", t, s, tfar, sfar); + + if (create_kernel_session(t, tfar, s, sfar) < 0) + return -1; + + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int pppox_fd = create_ppp_socket(tunn_local[t].l2tp_fd, t, tfar, s, sfar, (struct sockaddr *) &tunneladdr, sizeof(tunneladdr)); + if (pppox_fd < 0) + return -1; + + return pppox_fd; +} + // // Get the kernel PPP channel static int get_kernel_ppp_chan(sessionidt s, int pppox_fd) @@ -1013,6 +1122,257 @@ static int create_kernel_ppp_if(sessionidt s, int ppp_chan_fd, int *ifunit) return ppp_if_fd; } +// +// Tell whether we can try to enable PPP acceleration +static int can_kernel_accel(sessionidt s) +{ + if (!config->kernel_accel) + /* Disabled */ + return 0; + + if (session[s].bundle) + /* MPPP not supported yet */ + return 0; + + if (session[s].forwardtosession) + /* Forwarding not supported yet */ + return 0; + + if (session[s].throttle_in || session[s].throttle_out) + /* Throttling not supported */ + return 0; + + if (session[s].filter_in || session[s].filter_out) + /* Filtering not supported */ + return 0; + + if (session[s].snoop_ip) + /* Snooping not supported */ + return 0; + + if (session[s].walled_garden) + /* Walled garden not supported */ + return 0; + + /* Looks ok! */ + return 1; +} + +// +// Create the kernel PPP acceleration +static int create_kernel_accel(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (sess_local[s].ppp_if_idx) + /* Already set up */ + return 0; + + if (!can_kernel_accel(s)) + return -1; + + int pppox_fd = create_kernel_pppox(s); + if (pppox_fd < 0) + return -1; + + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + if (ppp_chan_fd < 0) + goto err_pppox_fd; + + int ifunit = -1; + int ppp_if_fd = create_kernel_ppp_if(s, ppp_chan_fd, &ifunit); + if (ppp_if_fd < 0) + goto err_chan_fd; + + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), PPP_IF_PREFIX"%u", ifunit); + if (ioctl(tunn_local[t].l2tp_fd, SIOCGIFINDEX, &ifr) < 0) + { + LOG(2, s, t, "Can't get if index of %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + if (setupif(ifr.ifr_ifindex, session[s].mru, 0)) + { + LOG(2, s, t, "Can't configure %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + struct epoll_event e; + e.events = EPOLLIN; + + static struct event_data d1[MAXSESSION]; + d1[s].type = FD_TYPE_PPPOX; + d1[s].index = s; + e.data.ptr = &d1[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, pppox_fd, &e); + + static struct event_data d2[MAXSESSION]; + d2[s].type = FD_TYPE_PPP_CHAN; + d2[s].index = s; + e.data.ptr = &d2[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_chan_fd, &e); + + static struct event_data d3[MAXSESSION]; + d3[s].type = FD_TYPE_PPP_IF; + d3[s].index = s; + e.data.ptr = &d3[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_if_fd, &e); + + sess_local[s].pppox_fd = pppox_fd; + sess_local[s].ppp_chan_fd = ppp_chan_fd; + sess_local[s].ppp_if_fd = ppp_if_fd; + sess_local[s].ppp_if_idx = ifr.ifr_ifindex; + + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); + + return 0; + +err_if_fd: + close(ppp_if_fd); +err_chan_fd: + close(ppp_chan_fd); +err_pppox_fd: + close(pppox_fd); + return -1; +} + +// +// Delete the kernel PPP acceleration +static int delete_kernel_accel(sessionidt s) +{ + if (!sess_local[s].ppp_if_idx) + /* Already stopped */ + return 0; + + LOG(3, s, session[s].tunnel, "Stopping kernel-accelerated support for %u:%u\n", session[s].tunnel, s); + + sess_local[s].ppp_if_idx = 0; + + ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_if_fd, NULL); + close(sess_local[s].ppp_if_fd); + sess_local[s].ppp_if_fd = -1; + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_chan_fd, NULL); + close(sess_local[s].ppp_chan_fd); + sess_local[s].ppp_chan_fd = -1; + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].pppox_fd, NULL); + close(sess_local[s].pppox_fd); + sess_local[s].pppox_fd = -1; + + delete_kernel_session(session[s].tunnel, s); + + return 0; +} + +// +// Enable (set=1) or disable (set=0) kernel PPP acceleration +// This basically calls create/delete_kernel_accel, but also updates routes +static void set_kernel_accel(sessionidt s, int set) +{ + if (set && !can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + tunnelidt t = session[s].tunnel; + if (set && tunnel[t].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + + routesset(s, &session[s], 0); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 0); + + if (set) + { + create_kernel_tunnel(t, tunnel[t].far); + create_kernel_accel(s); + } + else + delete_kernel_accel(s); + + routesset(s, &session[s], 1); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 1); +} + +// +// Try to enable/disable PPP acceleration as allowed +// This is typically called when switching a parameter that changes whether +// acceleration is allowed, e.g. snoop +void switch_kernel_accel(sessionidt s) +{ + if (!sess_local[s].ppp_if_idx) + { + /* Acceleration disabled */ + + if (!can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + /* Try to enable */ + set_kernel_accel(s, 1); + } + else + { + /* Acceleration enabled */ + + if (can_kernel_accel(s)) + /* Still allowed to enable it */ + return; + + /* Has to disable it */ + set_kernel_accel(s, 0); + } +} + +// +// Get traffic statistics from kernel and apply to our counters +static void apply_kernel_stats(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (session[s].tunnel == T_FREE) + /* It is free */ + return; + + if (!sess_local[s].ppp_if_idx) + /* It does not have kernel acceleration */ + return; + + struct pppol2tp_ioc_stats stats, *last_stats = &sess_local[s].last_stats; + int ret = ioctl(sess_local[s].pppox_fd, PPPIOCGL2TPSTATS, &stats); + if (ret < 0) + { + LOG(3, s, t, "Can't get stats with PPPIOCGL2TPSTATS: %s\n", strerror(errno)); + return; + } + + /* Some trafic from peer went through kernel, notice it */ + if (stats.rx_packets - last_stats->rx_packets) + session[s].last_packet = time_now; + + update_session_out_stat(s, + stats.tx_packets - last_stats->tx_packets, + stats.tx_bytes - last_stats->tx_bytes); + // stats.tx_errors + update_session_in_stat(s, + stats.rx_packets - last_stats->rx_packets, + stats.rx_bytes - last_stats->rx_bytes); + // stats.rx_seq_discards + // stats.rx_oos_packets + // stats.rx_errors + + *last_stats = stats; +} + // // Bridge kernel channels to accelerate LAC static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) @@ -1031,6 +1391,21 @@ static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) return 0; } +// Get interface idx for session +static int session_if_idx(sessionidt s) +{ + if (s != 0) + { + int idx = sess_local[s].ppp_if_idx; + if (idx > 0) + // Kernel-accelerated interface + return idx; + } + + // Software interface + return tunidx; +} + // Add a route // // This adds it to the routing table, advertises it @@ -1075,7 +1450,8 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); n_ip = htonl(ip); rtnetlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); if (gw) @@ -1199,7 +1575,8 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); rtnetlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); metric = 1; rtnetlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); @@ -1383,6 +1760,12 @@ static void initnetlink(void) genl_l2tp_id = netlink_get_l2tp_id(); LOG(3, 0, 0, "gen l2tp id is %d\n", genl_l2tp_id); + + if (config->kernel_accel) + { + delete_kernel_sessions(); + delete_kernel_tunnels(); + } } // @@ -2985,6 +3368,8 @@ void throttle_session(sessionidt s, int rate_in, int rate_out) session[s].throttle_out = rate_out; } + + switch_kernel_accel(s); } // add/remove filters from session (-1 = no change) @@ -3023,6 +3408,8 @@ void filter_session(sessionidt s, int filter_in, int filter_out) session[s].filter_out = filter_out; } + + switch_kernel_accel(s); } // start tidy shutdown of session @@ -3162,9 +3549,11 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e } cluster_send_bundle(b); - } + } } + delete_kernel_accel(s); + if (session[s].throttle_in || session[s].throttle_out) // Unthrottle if throttled. throttle_session(s, 0, 0); @@ -3265,8 +3654,13 @@ void sendipv6cp(sessionidt s, tunnelidt t) static void sessionclear(sessionidt s) { + delete_kernel_accel(s); + memset(&session[s], 0, sizeof(session[s])); memset(&sess_local[s], 0, sizeof(sess_local[s])); + sess_local[s].pppox_fd = -1; + sess_local[s].ppp_chan_fd = -1; + sess_local[s].ppp_if_fd = -1; memset(&cli_session_actions[s], 0, sizeof(cli_session_actions[s])); session[s].tunnel = T_FREE; // Mark it as free. @@ -3312,7 +3706,17 @@ void sessionkill(sessionidt s, char *reason) static void tunnelclear(tunnelidt t) { if (!t) return; + + if (tunn_local[t].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[t].l2tp_fd, NULL); + close(tunn_local[t].l2tp_fd); + } + memset(&tunnel[t], 0, sizeof(tunnel[t])); + memset(&tunn_local[t], 0, sizeof(tunn_local[t])); + tunn_local[t].l2tp_fd = -1; + tunnel[t].state = TUNNELFREE; } @@ -3347,6 +3751,8 @@ static void tunnelkill(tunnelidt t, char *reason) if (session[s].tunnel == t) sessionkill(s, reason); + delete_kernel_tunnel(t); + // free tunnel tunnelclear(t); LOG(1, 0, t, "Kill tunnel %u: %s\n", t, reason); @@ -3374,6 +3780,8 @@ static void tunnelshutdown(tunnelidt t, char *reason, int result, int error, cha if (session[s].tunnel == t) sessionshutdown(s, reason, CDN_NONE, TERM_ADMIN_RESET); + delete_kernel_tunnel(t); + tunnel[t].state = TUNNELDIE; tunnel[t].die = TIME + 700; // Clean up in 70 seconds cluster_send_tunnel(t); @@ -4097,6 +4505,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu // check authenticator if (memcmp(hash, recvchalresponse, 16) == 0) { + create_kernel_tunnel(t, tunnel[t].far); LOG(3, s, t, "sending SCCCN to REMOTE LNS\n"); controlt *c = controlnew(3); // sending SCCCN controls(c, 7, config->multi_n_hostname[tunnel[t].indexudp][0]?config->multi_n_hostname[tunnel[t].indexudp]:hostname, 1); // host name @@ -4122,6 +4531,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu LOG(3, s, t, "Received SCCN\n"); tunnel[t].state = TUNNELOPEN; tunnel[t].lastrec = time_now; + create_kernel_tunnel(t, tunnel[t].far); controlnull(t); // ack break; case 4: // StopCCN @@ -4303,18 +4713,6 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - else if (config->auth_tunnel_change_addr_src) - { - if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && - tunnel[t].port == ntohs(addr->sin_port)) - { - // The remotes BAS are a clustered l2tpns server and the source IP has changed - LOG(5, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", - fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); - - tunnel[t].ip = ntohl(addr->sin_addr.s_addr); - } - } if (s && !session[s].opened) // Is something wrong?? { @@ -4330,6 +4728,22 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s return; } + if (config->auth_tunnel_change_addr_src) + { + if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && + tunnel[t].port == ntohs(addr->sin_port)) + { + // The remotes BAS are a clustered l2tpns server and the source IP has changed + LOG(2, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", + fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); + + tunnel[t].ip = ntohl(addr->sin_addr.s_addr); + + update_kernel_tunnel(s, t); + cluster_send_tunnel(t); + } + } + if (proto == PPPPAP) { session[s].last_packet = time_now; @@ -4445,6 +4859,31 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s } } +static void processppp_from_kernel(sessionidt s, uint8_t *p, int l, struct sockaddr_in *addr) +{ + tunnelidt t = session[s].tunnel; + int indexudpfd = tunnel[t].indexudp; + struct sockaddr_in defaddr; + + /* Create L2TP header */ + uint16_t *w = (uint16_t *)p - 3; + w[0] = htons(0x0002); /* L2TP data */ + w[1] = htons(t); + w[2] = htons(s); + + if (!addr) + { + /* This is coming from the kernel socket, so it's coming from the address it is bound to */ + memset(&defaddr, 0, sizeof(defaddr)); + defaddr.sin_family = AF_INET; + defaddr.sin_addr.s_addr = htonl(tunnel[t].ip); + defaddr.sin_port = htons(tunnel[t].port); + addr = &defaddr; + } + + processppp(s, (uint8_t *) w, l + 6, p, l, addr, indexudpfd); +} + // read and process packet on tun // (i.e. this routine writes to buf[-8]). static void processtun(uint8_t * buf, int len) @@ -4817,6 +5256,7 @@ static void regular_cleanups(double period) LOG(2, s, session[s].tunnel, "Unsnooping session by CLI\n"); session[s].snoop_ip = 0; session[s].snoop_port = 0; + switch_kernel_accel(s); s_actions++; send++; } @@ -4828,6 +5268,7 @@ static void regular_cleanups(double period) session[s].snoop_ip = cli_session_actions[s].snoop_ip; session[s].snoop_port = cli_session_actions[s].snoop_port; + switch_kernel_accel(s); s_actions++; send++; } @@ -5006,14 +5447,7 @@ static int still_busy(void) return 0; } -#ifdef HAVE_EPOLL -# include -#else -# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ -# include "fake_epoll.h" -#endif - -// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess +// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, kernel ppp #define BASE_FDS (9 + MAX_UDPFD) // additional polled fds @@ -5023,6 +5457,13 @@ static int still_busy(void) # define EXTRA_FDS 0 #endif +#define L2TP_FDS MAXTUNNEL +#define PPPOX_FDS MAXSESSION +#define PPP_CHAN_FDS MAXSESSION +#define PPP_IF_FDS MAXSESSION + +#define MAX_FDS (BASE_FDS + RADIUS_FDS + EXTRA_FDS + L2TP_FDS + PPPOX_FDS + PPP_CHAN_FDS + PPP_IF_FDS) + // main loop - gets packets on tun or udp and processes them static void mainloop(void) { @@ -5032,7 +5473,7 @@ static void mainloop(void) // and the forwarded pppoe session int size_bufp = sizeof(buf) - 32; clockt next_cluster_ping = 0; // send initial ping immediately - struct epoll_event events[BASE_FDS + RADIUS_FDS + EXTRA_FDS]; + struct epoll_event events[MAX_FDS]; int maxevent = sizeof(events)/sizeof(*events); if ((epollfd = epoll_create(maxevent)) < 0) @@ -5249,6 +5690,103 @@ static void mainloop(void) break; } + case FD_TYPE_L2TP: + { + tunnelidt tid = d->index; + if (events[i].events & EPOLLHUP) + { + /* Acceleration tunnel got destroyed... Disable it on our side. */ + LOG(1, 0, tid, "L2tp socket got closed!! Disabling kernel acceleration for this tunnel. Are you running two l2tpns instances in the same network namespace?\n"); + + sessionidt sid; + for (sid = 1; sid <= config->cluster_highest_sessionid ; ++sid) + if (session[sid].tunnel == tid) + set_kernel_accel(sid, 0); + + delete_kernel_tunnel(tid); + } + else + { + alen = sizeof(addr); + s = recvfrom(tunn_local[tid].l2tp_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, 0, tid, "Error on l2tp socket: %s\n", strerror(errno)); + } + else + processudp(p, s, &addr, tunnel[tid].indexudp); + } + n--; + break; + } + + case FD_TYPE_PPPOX: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + alen = sizeof(addr); + s = recvfrom(sess_local[sid].pppox_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, sid, tid, "Error on pppox socket: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on pppox socket\n"); + set_kernel_accel(sid, 0); + } + else + { + LOG(3, sid, tid, "Got frame on pppox socket?? %02x %02x %02x %02x\n", p[0], p[1], p[2], p[3]); + processppp_from_kernel(sid, p, s, &addr); + } + n--; + break; + } + + case FD_TYPE_PPP_CHAN: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_chan_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp channel: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp channel\n"); + set_kernel_accel(sid, 0); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + + case FD_TYPE_PPP_IF: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_if_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp if: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp if\n"); + set_kernel_accel(sid, 0); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } @@ -5370,6 +5908,12 @@ static void mainloop(void) struct param_timer p = { time_now }; run_plugins(PLUGIN_TIMER, &p); } + + sessionidt s; + for (s = 1; s <= config->cluster_highest_sessionid ; ++s) + { + apply_kernel_stats(s); + } } // Runs on every machine (master and slaves). @@ -5579,6 +6123,11 @@ static void initdata(int optdebug, char *optconfig) LOG(0, 0, 0, "Error doing malloc for tunnels: %s\n", strerror(errno)); exit(1); } + if (!(tunn_local = shared_malloc(sizeof(tunnellocalt) * MAXTUNNEL))) + { + LOG(0, 0, 0, "Error doing malloc for tunn_local: %s\n", strerror(errno)); + exit(1); + } if (!(bundle = shared_malloc(sizeof(bundlet) * MAXBUNDLE))) { LOG(0, 0, 0, "Error doing malloc for bundles: %s\n", strerror(errno)); @@ -5635,6 +6184,10 @@ static void initdata(int optdebug, char *optconfig) memset(cli_tunnel_actions, 0, sizeof(struct cli_tunnel_actions) * MAXSESSION); memset(tunnel, 0, sizeof(tunnelt) * MAXTUNNEL); + memset(tunn_local, 0, sizeof(tunnellocalt) * MAXTUNNEL); + for (i = 0; i < MAXTUNNEL; i++) { + tunn_local[i].l2tp_fd = -1; + } memset(bundle, 0, sizeof(bundlet) * MAXBUNDLE); memset(session, 0, sizeof(sessiont) * MAXSESSION); memset(radius, 0, sizeof(radiust) * MAXRADIUS); @@ -5645,6 +6198,9 @@ static void initdata(int optdebug, char *optconfig) { session[i].next = i + 1; session[i].tunnel = T_UNDEF; // mark it as not filled in. + sess_local[i].pppox_fd = -1; + sess_local[i].ppp_chan_fd = -1; + sess_local[i].ppp_if_fd = -1; } session[MAXSESSION - 1].next = 0; sessionfree = 1; @@ -6125,7 +6681,13 @@ int main(int argc, char *argv[]) rlim.rlim_max = RLIM_INFINITY; // Remove the maximum core size if (setrlimit(RLIMIT_CORE, &rlim) < 0) - LOG(0, 0, 0, "Can't set ulimit: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't set core ulimit: %s\n", strerror(errno)); + + rlim.rlim_cur = MAX_FDS; + rlim.rlim_max = MAX_FDS; + // Lift the maximum file open limit + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) + LOG(0, 0, 0, "Can't set nofile ulimit: %s\n", strerror(errno)); // Make core dumps go to /tmp if(chdir("/tmp")) LOG(0, 0, 0, "Error chdir /tmp: %s\n", strerror(errno)); @@ -6640,6 +7202,12 @@ static void update_config() LOG(0, 0, 0, "Can't write to PID file %s: %s\n", config->pid_file, strerror(errno)); } } + + for (i = 1; i <= config->cluster_highest_sessionid ; ++i) + { + if (session[i].ppp.lcp == Opened) + switch_kernel_accel(i); + } } static void read_config_file() @@ -6739,6 +7307,8 @@ int sessionsetup(sessionidt s, tunnelidt t) } } + create_kernel_accel(s); + // no need to set a route for the same IP address of the bundle if (!session[s].bundle || (bundle[session[s].bundle].num_of_links == 1)) // Add the route for this session. @@ -6768,6 +7338,52 @@ int sessionsetup(sessionidt s, tunnelidt t) return 1; // RADIUS OK and IP allocated, done... } +// +// This tunnel just got dropped on us by the master or something. +// Make sure our tables up up to date... +// +int load_tunnel(tunnelidt t, tunnelt *new) +{ + int dropped = 0; + int ip_changed = 0; + + if (tunnel[t].state != TUNNELFREE && new->state == TUNNELFREE) + dropped = 1; + + // if already connected, check if IP changed + if (tunn_local[t].l2tp_fd >= 0 && (tunnel[t].ip != new->ip || tunnel[t].port != new->port)) + ip_changed = 1; + + memcpy(&tunnel[t], new, sizeof(tunnel[t]) ); + + // + // Clear tunnel control messages. These are dynamically allocated. + // If we get unlucky, this may cause the tunnel to drop! + // + tunnel[t].controls = tunnel[t].controle = NULL; + tunnel[t].controlc = 0; + + if (tunnel[t].state == TUNNELFREE) + { + if (dropped) + delete_kernel_tunnel(t); + } + else + { + create_kernel_tunnel(t, tunnel[t].far); + + if (ip_changed) { + LOG(2, 0, t, "Updating tunnel IP from heartbeat\n"); + update_kernel_tunnel(0, t); + } + + if (t > config->cluster_highest_tunnelid) // Maintain this in the slave too. + config->cluster_highest_tunnelid = t; + } + + return 1; +} + // // This session just got dropped on us by the master or something. // Make sure our tables up up to date... @@ -6776,6 +7392,7 @@ int load_session(sessionidt s, sessiont *new) { int i; int newip = 0; + int newsession = 0; // Sanity checks. if (new->ip_pool_index >= MAXIPPOOL || @@ -6791,6 +7408,11 @@ int load_session(sessionidt s, sessiont *new) // loading the new session. // + if (new->tunnel != session[s].tunnel || + new->far != session[s].far) + // This is a new session + newsession = 1; + session[s].tunnel = new->tunnel; // For logging in cache_ipmap // See if routes/ip cache need updating @@ -6810,7 +7432,14 @@ int load_session(sessionidt s, sessiont *new) // remove old IPV6 routes... routes6set(s, &session[s], 0); + } + if (newsession) + // The session changed, drop existing kernel acceleration + delete_kernel_accel(s); + + if (newip) + { // add new routes... routesset(s, new, 1); } @@ -6854,6 +7483,9 @@ int load_session(sessionidt s, sessiont *new) if (new->ip_pool_index != -1) fix_address_pool(s); + // and try to enable kernel acceleration + switch_kernel_accel(s); + return 1; } diff --git a/l2tpns.h b/l2tpns.h index 80cccf5..21b5d1b 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include #define VERSION "2.4.1" @@ -446,6 +449,18 @@ typedef struct uint32_t jitteravg; // time in milliseconds of the last fragment. uint64_t prev_time; + + // l2tp PPPoL2TP socket + int pppox_fd; + struct pppol2tp_ioc_stats last_stats; + + // ppp channel + int ppp_chan_fd; + + // ppp interface + int ppp_if_fd; + // ppp interface index + int ppp_if_idx; } sessionlocalt; // session flags @@ -479,6 +494,12 @@ typedef struct } tunnelt; +typedef struct +{ + int l2tp_fd; // kernel acceleration UDP socket +} +tunnellocalt; + // 164 bytes per radius session typedef struct // outstanding RADIUS requests { @@ -821,6 +842,7 @@ typedef struct uint32_t dhcp6_server_duid; // DUID of dhcpv6 server (see rfc3315) uint32_t dns6_lifetime; // RDNSS lifetime default 1200 (see rfc6106, rfc4861) (MaxRtrAdvInterval <= Lifetime <= 2*MaxRtrAdvInterval) char default_ipv6_domain_list[255]; + int kernel_accel; // Enable kernel-accelerated support } configt; enum config_typet { INT, STRING, UNSIGNED_LONG, SHORT, BOOL, IPv4, IPv6 }; @@ -1008,6 +1030,7 @@ int sessionsetup(sessionidt s, tunnelidt t); int run_plugins(int plugin_type, void *data); void rebuild_address_pool(void); void throttle_session(sessionidt s, int rate_in, int rate_out); +int load_tunnel(tunnelidt, tunnelt *); int load_session(sessionidt, sessiont *); void become_master(void); // We're the master; kick off any required master initializations. @@ -1028,6 +1051,7 @@ extern tunnelt *tunnel; extern bundlet *bundle; extern sessiont *session; extern sessionlocalt *sess_local; +extern tunnellocalt *tunn_local; extern ippoolt *ip_address_pool; #define sessionfree (session[0].next) @@ -1057,7 +1081,11 @@ struct event_data { FD_TYPE_BGP, FD_TYPE_RTNETLINK, FD_TYPE_PPPOEDISC, - FD_TYPE_PPPOESESS + FD_TYPE_PPPOESESS, + FD_TYPE_L2TP, + FD_TYPE_PPPOX, + FD_TYPE_PPP_CHAN, + FD_TYPE_PPP_IF, } type; int index; // for RADIUS, BGP, UDP }; diff --git a/util.c b/util.c index 6e50fce..d2f5445 100644 --- a/util.c +++ b/util.c @@ -127,6 +127,18 @@ pid_t fork_and_close() if (pppoediscfd != -1) close(pppoediscfd); if (pppoesessfd != -1) close(pppoesessfd); + for (i = 0; i <= config->cluster_highest_tunnelid; i++) + { + if (tunn_local[i].l2tp_fd >= 0) close(tunn_local[i].l2tp_fd); + } + + for (i = 0; i <= config->cluster_highest_sessionid; i++) + { + if (sess_local[i].pppox_fd >= 0) close(sess_local[i].pppox_fd); + if (sess_local[i].ppp_chan_fd >= 0) close(sess_local[i].ppp_chan_fd); + if (sess_local[i].ppp_if_fd >= 0) close(sess_local[i].ppp_if_fd); + } + if (snoopfd != -1) close(snoopfd); if (rand_fd != -1) close(rand_fd); if (epollfd != -1) close(epollfd); From 5dcbd68b75a83850fdbafe52a43c0325ec4bddf8 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 8 May 2023 03:12:45 +0200 Subject: [PATCH 15/28] Add DHCPv6 support with acceleration --- dhcp6.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- dhcp6.h | 7 +++ l2tpns.c | 23 ++++++++-- l2tpns.h | 1 + util.c | 1 + 5 files changed, 164 insertions(+), 5 deletions(-) diff --git a/dhcp6.c b/dhcp6.c index 4454f4e..ffb738d 100644 --- a/dhcp6.c +++ b/dhcp6.c @@ -4,13 +4,21 @@ * GPL licenced */ +#define _GNU_SOURCE #include #include #include +#include +#include +#include #include "dhcp6.h" #include "l2tpns.h" #include "ipv6_u.h" +#include "cluster.h" +#include "util.h" + +int dhcpv6fd; struct dhcp6_in_option { @@ -538,7 +546,7 @@ void dhcpv6_process_from_ipv6(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - if (p_ip6_hdr_in->ip6_nxt != 17) + if (p_ip6_hdr_in->ip6_nxt != IPPROTO_UDP) { LOG(5, 0, 0, "not UDP DHCP packet??\n"); return; @@ -597,6 +605,8 @@ static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer) void dhcpv6_init(void) { uint32_t id; + int on = 1; + struct sockaddr_in6 addr; dhcp6_local_serverid.opt_hdr.code = htons(D6_OPT_SERVERID); dhcp6_local_serverid.opt_hdr.len = htons(4 + sizeof(id)); @@ -609,4 +619,129 @@ void dhcpv6_init(void) id = htobe32(0xFDFDFAFA); memcpy(dhcp6_local_serverid.duid.u.ll.addr, &id, sizeof(id)); + + dhcpv6fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); + if (dhcpv6fd < 0) + LOG(1, 0, 0, "DHCPv6: could not create UDP socket: %s\n", strerror(errno)); + +#ifdef SO_REUSEPORT + if (setsockopt(dhcpv6fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set reusing port: %s\n", strerror(errno)); +#endif + + if (setsockopt(dhcpv6fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set reusing address: %s\n", strerror(errno)); + +#ifdef IPV6_RECVPKTINFO + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_RECVPKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not request pktinfo: %s\n", strerror(errno)); +#else + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_PKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not request pktinfo: %s\n", strerror(errno)); +#endif +#ifdef IPV6_V6ONLY + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set v6only: %s\n", strerror(errno)); +#endif + + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(DHCP6_SERVER_PORT); + if (bind(dhcpv6fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + LOG(1, 0, 0, "DHCPv6: could not bind to DHCPv6 server port\n"); +} + +// +// A new ppp interface was created, watch for DHCPv6 on it +void dhcpv6_listen(int ifidx) +{ + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + mreq.ipv6mr_interface = ifidx; + inet_pton(AF_INET6, DHCP6_SERVER_ADDRESS, &mreq.ipv6mr_multiaddr); + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) + LOG(2, 0, 0, "DHCPv6: could not join DHCPv6 group: %s\n", strerror(errno)); +} + +// +// A DHCPv6 request was received on a ppp interface, receive it +void dhcpv6_process_from_kernel(uint8_t *p, size_t size_bufp) +{ + struct sockaddr_storage fromaddr; + struct sockaddr_in6 *sin6; + socklen_t fromlen = sizeof(fromaddr); + struct in6_addr toaddr; + int ifidx; + int r, s, t; + + r = recvfromto6(dhcpv6fd, p, size_bufp, 0, (struct sockaddr *) &fromaddr, &fromlen, &toaddr, &ifidx); + if (r < 0) + { + static time_t lastwarn; + time_t now = time(NULL); + if (now > lastwarn) + { + LOG(5, 0, 0, "DHCPV6: reception error: %s\n", strerror(errno)); + lastwarn = now; + } + return; + } + LOG(5, 0, 0, "Got packet on DHCP socket on if %d\n", ifidx); + + if (fromaddr.ss_family != AF_INET6) + { + LOG(5, 0, 0, "DHCPV6: got strange family %d\n", fromaddr.ss_family); + return; + } + sin6 = (struct sockaddr_in6 *) &fromaddr; + + if (ntohs(sin6->sin6_port) != DHCP6_CLIENT_PORT) + { + LOG(5, 0, 0, "DHCPV6: got strange client port %d\n", ntohs(sin6->sin6_port)); + return; + } + + for (s = 1; s < MAXSESSION; s++) + { + if (sess_local[s].ppp_if_idx != ifidx) + continue; + + t = session[s].tunnel; + + if (config->cluster_iam_master) + dhcpv6_process(s, t, &sin6->sin6_addr, p, r); + else + { + // DHCPV6 must be managed by the Master. + + // Fake UDPv6 header + struct udphdr *udp = (struct udphdr *)p - 1; + udp->source = sin6->sin6_port; + udp->dest = htons(DHCP6_SERVER_PORT); + udp->len = sizeof(*udp) + r; + // udp->check is not checked by Master anyway + r += sizeof(*udp); + + struct ip6_hdr *ip6 = (struct ip6_hdr *)udp - 1; + ip6->ip6_flow = htonl(6); + ip6->ip6_plen = htons(r); + ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_hlim = 255; + memcpy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(sin6->sin6_addr)); + memcpy(&ip6->ip6_dst, &toaddr, sizeof(toaddr)); + r += sizeof(*ip6); + + uint16_t *w = (uint16_t *)ip6 - 4; + w[0] = htons(0x0002); /* L2TP data*/ + w[1] = htons(t); + w[2] = htons(s); + w[3] = htons(PPPIPV6); /* PPP protocol */ + r += 8; + + master_forward_packet((uint8_t *) w, r, htonl(tunnel[t].ip), htons(tunnel[t].port), tunnel[t].indexudp); + } + + break; + } } diff --git a/dhcp6.h b/dhcp6.h index 54cd76d..38fa445 100644 --- a/dhcp6.h +++ b/dhcp6.h @@ -7,6 +7,10 @@ #ifndef __DHCP6_H__ #define __DHCP6_H__ +#define DHCP6_CLIENT_PORT 546 +#define DHCP6_SERVER_PORT 547 +#define DHCP6_SERVER_ADDRESS "ff02::1:2" + #define DHCP6_SOLICIT 1 #define DHCP6_ADVERTISE 2 #define DHCP6_REQUEST 3 @@ -212,7 +216,10 @@ struct dhcp6_opt_ia_prefix { } __attribute__((packed)); // dhcp6.c +extern int dhcpv6fd; void dhcpv6_process_from_ipv6(uint16_t s, uint16_t t, uint8_t *p, uint16_t l); void dhcpv6_init(void); +void dhcpv6_listen(int ifidx); +void dhcpv6_process_from_kernel(uint8_t *p, size_t size_bufp); #endif /* __DHCP6_H__ */ diff --git a/l2tpns.c b/l2tpns.c index 54efdf0..d33acc5 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -1228,6 +1228,8 @@ static int create_kernel_accel(sessionidt s) sess_local[s].ppp_if_fd = ppp_if_fd; sess_local[s].ppp_if_idx = ifr.ifr_ifindex; + dhcpv6_listen(ifr.ifr_ifindex); + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); return 0; @@ -5447,8 +5449,8 @@ static int still_busy(void) return 0; } -// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, kernel ppp -#define BASE_FDS (9 + MAX_UDPFD) +// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, dhcpv6 +#define BASE_FDS (10 + MAX_UDPFD) // additional polled fds #ifdef BGP @@ -5464,14 +5466,17 @@ static int still_busy(void) #define MAX_FDS (BASE_FDS + RADIUS_FDS + EXTRA_FDS + L2TP_FDS + PPPOX_FDS + PPP_CHAN_FDS + PPP_IF_FDS) +// for the header of the forwarded MPPP/DHCP packet (see C_MPPP_FORWARD) +#define SLACK 56 + // main loop - gets packets on tun or udp and processes them static void mainloop(void) { int i, j; uint8_t buf[65536]; - uint8_t *p = buf + 32; // for the header of the forwarded MPPP packet (see C_MPPP_FORWARD) + uint8_t *p = buf + SLACK; // for the header of the forwarded MPPP packet (see C_MPPP_FORWARD) // and the forwarded pppoe session - int size_bufp = sizeof(buf) - 32; + int size_bufp = sizeof(buf) - SLACK; clockt next_cluster_ping = 0; // send initial ping immediately struct epoll_event events[MAX_FDS]; int maxevent = sizeof(events)/sizeof(*events); @@ -5528,6 +5533,10 @@ static void mainloop(void) e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, pppoesessfd, &e); + d[i].type = FD_TYPE_DHCPV6; + e.data.ptr = &d[i++]; + epoll_ctl(epollfd, EPOLL_CTL_ADD, dhcpv6fd, &e); + for (j = 0; j < config->nbudpfd; j++) { d[i].type = FD_TYPE_UDP; @@ -5787,6 +5796,12 @@ static void mainloop(void) break; } + case FD_TYPE_DHCPV6: + { + dhcpv6_process_from_kernel(p, size_bufp); + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } diff --git a/l2tpns.h b/l2tpns.h index 21b5d1b..5ddd538 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1086,6 +1086,7 @@ struct event_data { FD_TYPE_PPPOX, FD_TYPE_PPP_CHAN, FD_TYPE_PPP_IF, + FD_TYPE_DHCPV6, } type; int index; // for RADIUS, BGP, UDP }; diff --git a/util.c b/util.c index d2f5445..41730a2 100644 --- a/util.c +++ b/util.c @@ -139,6 +139,7 @@ pid_t fork_and_close() if (sess_local[i].ppp_if_fd >= 0) close(sess_local[i].ppp_if_fd); } + if (dhcpv6fd != -1) close(dhcpv6fd); if (snoopfd != -1) close(snoopfd); if (rand_fd != -1) close(rand_fd); if (epollfd != -1) close(epollfd); From 58d038dc0b09af653090d0f43dede584efddf9a3 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 18 May 2023 00:40:31 +0200 Subject: [PATCH 16/28] Add ICMPv6 support with acceleration --- icmp.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ l2tpns.c | 16 +++++++-- l2tpns.h | 5 +++ util.c | 1 + 4 files changed, 120 insertions(+), 2 deletions(-) diff --git a/icmp.c b/icmp.c index 168eb5f..3e45e58 100644 --- a/icmp.c +++ b/icmp.c @@ -6,13 +6,62 @@ #include #include #include +#include +#include #include "dhcp6.h" #include "l2tpns.h" #include "ipv6_u.h" +#include "util.h" + +int icmpv6fd; static uint16_t _checksum(uint8_t *addr, int count); +void icmpv6_init(void) +{ + int on = 1, check = 2; + struct icmp6_filter filter; + + icmpv6fd = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6); + if (icmpv6fd < 0) + LOG(1, 0, 0, "ICMPv6: could not create socket: %s\n", strerror(errno)); + +#ifdef IPV6_RECVPKTINFO + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_RECVPKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "ICMPv6: could not request pktinfo: %s\n", strerror(errno)); +#else + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_PKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "ICMPv6: could not request pktinfo: %s\n", strerror(errno)); +#endif + +#ifdef __linux__ + if (setsockopt(icmpv6fd, SOL_RAW, IPV6_CHECKSUM, &check, sizeof(check)) < 0) +#else + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_CHECKSUM, &check, sizeof(check)) < 0) +#endif + LOG(1, 0, 0, "ICMPv6: could not request for checking checksums: %s\n", strerror(errno)); + + ICMP6_FILTER_SETBLOCKALL(&filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter); + + if (setsockopt(icmpv6fd, SOL_ICMPV6, ICMP6_FILTER, &filter, sizeof(filter)) < 0) + LOG(1, 0, 0, "ICMPv6: could not set filter for RS: %s\n", strerror(errno)); +} + +// +// A new ppp interface was created, watch for ICMPv6 RS on it +void icmpv6_listen(int ifidx) +{ + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + mreq.ipv6mr_interface = ifidx; + inet_pton(AF_INET6, "ff02::2", &mreq.ipv6mr_multiaddr); + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) + LOG(2, 0, 0, "ICMPv6: could not join all routers group: %s\n", strerror(errno)); +} + void host_unreachable(in_addr_t destination, uint16_t id, in_addr_t source, uint8_t *packet, int packet_len) { char buf[128] = {0}; @@ -203,3 +252,54 @@ void send_ipv6_ra(sessionidt s, tunnelidt t, const struct in6_addr *ip) tunnelsend(b, l + (((uint8_t *) p_ip6_hdr)-b), t); // send it... return; } + +// +// An ICMPv6 request was received on a ppp interface, receive it +void icmpv6_process_from_kernel(uint8_t *p, size_t size_bufp) +{ + struct sockaddr_storage fromaddr; + struct sockaddr_in6 *sin6; + socklen_t fromlen = sizeof(fromaddr); + struct in6_addr toaddr; + int ifidx; + int r, s, t; + + r = recvfromto6(icmpv6fd, p, size_bufp, 0, (struct sockaddr *) &fromaddr, &fromlen, &toaddr, &ifidx); + if (r < 0) + { + static time_t lastwarn; + time_t now = time(NULL); + if (now > lastwarn) + { + LOG(5, 0, 0, "ICMPV6: reception error: %s\n", strerror(errno)); + lastwarn = now; + } + return; + } + LOG(5, 0, 0, "Got packet on ICMP socket on if %d\n", ifidx); + + if (fromaddr.ss_family != AF_INET6) + { + LOG(5, 0, 0, "ICMPV6: got strange family %d\n", fromaddr.ss_family); + return; + } + sin6 = (struct sockaddr_in6 *) &fromaddr; + + if (*p != ND_ROUTER_SOLICIT) + { + LOG(5, 0, 0, "ICMPV6: not router sollicitation??: %d\n", *p); + return; + } + + for (s = 1; s < MAXSESSION; s++) + { + if (sess_local[s].ppp_if_idx != ifidx) + continue; + + t = session[s].tunnel; + + send_ipv6_ra(s, t, &sin6->sin6_addr); + + break; + } +} diff --git a/l2tpns.c b/l2tpns.c index d33acc5..33a2bb1 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -1229,6 +1229,7 @@ static int create_kernel_accel(sessionidt s) sess_local[s].ppp_if_idx = ifr.ifr_ifindex; dhcpv6_listen(ifr.ifr_ifindex); + icmpv6_listen(ifr.ifr_ifindex); memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); @@ -5449,8 +5450,8 @@ static int still_busy(void) return 0; } -// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, dhcpv6 -#define BASE_FDS (10 + MAX_UDPFD) +// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, dhcpv6, icmpv6 +#define BASE_FDS (11 + MAX_UDPFD) // additional polled fds #ifdef BGP @@ -5537,6 +5538,10 @@ static void mainloop(void) e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, dhcpv6fd, &e); + d[i].type = FD_TYPE_ICMPV6; + e.data.ptr = &d[i++]; + epoll_ctl(epollfd, EPOLL_CTL_ADD, icmpv6fd, &e); + for (j = 0; j < config->nbudpfd; j++) { d[i].type = FD_TYPE_UDP; @@ -5802,6 +5807,12 @@ static void mainloop(void) break; } + case FD_TYPE_ICMPV6: + { + icmpv6_process_from_kernel(p, size_bufp); + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } @@ -6774,6 +6785,7 @@ int main(int argc, char *argv[]) initrad(); initippool(); dhcpv6_init(); + icmpv6_init(); // seed prng { diff --git a/l2tpns.h b/l2tpns.h index 5ddd538..8b5468f 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1044,6 +1044,10 @@ int cli_arg_help(struct cli_def *cli, int cr_ok, char *entry, ...); // icmp.c +extern int icmpv6fd; +void icmpv6_init(void); +void icmpv6_listen(int ifidx); +void icmpv6_process_from_kernel(uint8_t *p, size_t size_bufp); void host_unreachable(in_addr_t destination, uint16_t id, in_addr_t source, uint8_t *packet, int packet_len); @@ -1087,6 +1091,7 @@ struct event_data { FD_TYPE_PPP_CHAN, FD_TYPE_PPP_IF, FD_TYPE_DHCPV6, + FD_TYPE_ICMPV6, } type; int index; // for RADIUS, BGP, UDP }; diff --git a/util.c b/util.c index 41730a2..6829203 100644 --- a/util.c +++ b/util.c @@ -140,6 +140,7 @@ pid_t fork_and_close() } if (dhcpv6fd != -1) close(dhcpv6fd); + if (icmpv6fd != -1) close(icmpv6fd); if (snoopfd != -1) close(snoopfd); if (rand_fd != -1) close(rand_fd); if (epollfd != -1) close(epollfd); From 1b9c50df98469218ea6cf2432ed05e540dc5c03d Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 27 May 2023 12:39:01 +0200 Subject: [PATCH 17/28] Comment on memory needs for igmp6 subscriptions --- docs/manpages/startup-config.5 | 8 ++++++++ docs/src/man/startup-config.5.md | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/manpages/startup-config.5 b/docs/manpages/startup-config.5 index e1babbd..62002d1 100644 --- a/docs/manpages/startup-config.5 +++ b/docs/manpages/startup-config.5 @@ -418,6 +418,14 @@ Each throttled session requires two buckets (in and out). Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. +Also, if you have a lot of clients (e.g.\ at least a hundred), listening +for DHCPv6 and RS requires a lot of igmp6 subscriptions, tuning sysctl +may be needed, such as +.PP +sysctl net.core.optmem_max=1048576 +.PP +otherwise the logs will mention failures to subscribe due to lack of +memory. .SS DHCPv6 And IPv6 SETTINGS .PP \f[B]dhcp6_preferred_lifetime\f[R] (int) diff --git a/docs/src/man/startup-config.5.md b/docs/src/man/startup-config.5.md index 60176e3..1ca67a2 100644 --- a/docs/src/man/startup-config.5.md +++ b/docs/src/man/startup-config.5.md @@ -253,7 +253,11 @@ The following `variables` may be set: **kernel\_accel** (boolean) - Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. + Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. Also, if you have a lot of clients (e.g. at least a hundred), listening for DHCPv6 and RS requires a lot of igmp6 subscriptions, tuning sysctl may be needed, such as + +sysctl net.core.optmem\_max=1048576 + +otherwise the logs will mention failures to subscribe due to lack of memory. ## DHCPv6 And IPv6 SETTINGS From c8c197bf3a5c10039556b154d086d09814908ebb Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 2 Aug 2023 19:23:13 +0900 Subject: [PATCH 18/28] processppp: fix IP log when tunnel IP source change fmtaddr formats the string to a static buffer that must be selected manually, but this log message was re-using the same buffer. Use another index for 2nd IP so we get to know which is what. Fixes: 2429969bd04d ("Add L2TP offloading support") --- l2tpns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/l2tpns.c b/l2tpns.c index 33a2bb1..572443f 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -4738,7 +4738,7 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s { // The remotes BAS are a clustered l2tpns server and the source IP has changed LOG(2, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", - fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); + fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 1)); tunnel[t].ip = ntohl(addr->sin_addr.s_addr); From 72fca9b8f0dac6c6a2d25bd78031f7a2684164fc Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 21 Aug 2023 20:29:15 +0900 Subject: [PATCH 19/28] create/update_kernel_tunnel: add some debug logs --- l2tpns.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 572443f..34afaf7 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -640,7 +640,7 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) /* Already set up */ return 0; - LOG(3, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); + LOG(2, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); memset(&req, 0, sizeof(req)); @@ -704,8 +704,10 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) // Update remote address of kernel tunnel static int update_kernel_tunnel(sessionidt s, tunnelidt t) { - if (tunn_local[t].l2tp_fd < 0) + if (tunn_local[t].l2tp_fd < 0) { + LOG(2, s, t, "IP change was requested for tunnel before it is connected\n"); return -1; + } struct sockaddr_in tunneladdr; memset(&tunneladdr, 0, sizeof(tunneladdr)); From 60b78506e84293640c24ee0ab11d5be163f9c63a Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 12 Nov 2023 01:27:54 +0100 Subject: [PATCH 20/28] Document that MSS clamping needs to be done by Linux --- etc/startup-config.default | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/etc/startup-config.default b/etc/startup-config.default index 3b3f223..82bb1c8 100644 --- a/etc/startup-config.default +++ b/etc/startup-config.default @@ -149,3 +149,12 @@ set ppp_keepalive yes # Kernel acceleration, enable on no more than one instance! #set kernel_accel yes +# +# You will probably want to also enable MSS clamping, which l2tpns won't be able to do any more: +# iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu +# ip6tables -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu +# or +# nft add rule inet filter forward tcp flags syn tcp option maxseg size set rt mtu +# +# and allow dhcpv6 traffic: +# iptables -A INPUT -i ppp+ -p udp --sport 546 --dport 547 -j ACCEPT From f00ec1c6e49fc77fc78a229467a670e9d0a6638b Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 3 Dec 2023 22:49:52 +0100 Subject: [PATCH 21/28] Note that one probably wants to increase memory for igmp6 --- etc/startup-config.default | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etc/startup-config.default b/etc/startup-config.default index 82bb1c8..496522e 100644 --- a/etc/startup-config.default +++ b/etc/startup-config.default @@ -158,3 +158,6 @@ set ppp_keepalive yes # # and allow dhcpv6 traffic: # iptables -A INPUT -i ppp+ -p udp --sport 546 --dport 547 -j ACCEPT +# +# and increase the memory available for igmp6 for DHCPv6 and RS: +# sysctl net.core.optmem_max=10485760 From 0ac498d7d37fa27032021b193ab0a9d7ae17f227 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 18 Jan 2024 02:00:21 +0100 Subject: [PATCH 22/28] cli: Show acceleration interface name And allow selecting a session through it --- cli.c | 17 ++++++++++++++++- l2tpns.c | 6 ++++-- l2tpns.h | 6 +++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/cli.c b/cli.c index b0049ac..e3cea88 100644 --- a/cli.c +++ b/cli.c @@ -451,10 +451,23 @@ static int cmd_show_session(struct cli_def *cli, const char *command, char **arg for (i = 0; i < argc; i++) { unsigned int s, b_in, b_out, r; + int ifunit = -1; + + if (!strncmp(argv[i], PPP_IF_PREFIX, strlen(PPP_IF_PREFIX))) + { + char *start = argv[i]+strlen(PPP_IF_PREFIX); + char *end; + long res = strtol(start, &end, 10); + if (end != start && !*end) + ifunit = res; + } for (s = 0; s < MAXSESSION; s++) - if (!strcmp(argv[i], session[s].user)) + { + if ((ifunit >= 0 && sess_local[s].ppp_if_unit == ifunit) + || !strcmp(argv[i], session[s].user)) break; + } if (s >= MAXSESSION) { s = atoi(argv[i]); @@ -473,6 +486,8 @@ static int cmd_show_session(struct cli_def *cli, const char *command, char **arg cli_print(cli, "\tRemote ID:\t%d", session[s].far); if (session[s].bundle) cli_print(cli, "\tBundle ID:\t%d (%d)", session[s].bundle, bundle[session[s].bundle].num_of_links); + if (sess_local[s].ppp_if_unit >= 0) + cli_print(cli, "\tInterface:\tppp%d", sess_local[s].ppp_if_unit); cli_print(cli, "\tPPP Phase:\t%s", ppp_phase(session[s].ppp.phase)); switch (session[s].ppp.phase) { diff --git a/l2tpns.c b/l2tpns.c index 34afaf7..378e6fe 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -49,8 +49,6 @@ #define PPPIOCUNBRIDGECHAN _IO('t', 54) #endif -#define PPP_IF_PREFIX "ppp" - #include "md5.h" #include "dhcp6.h" #include "l2tpns.h" @@ -1228,6 +1226,7 @@ static int create_kernel_accel(sessionidt s) sess_local[s].pppox_fd = pppox_fd; sess_local[s].ppp_chan_fd = ppp_chan_fd; sess_local[s].ppp_if_fd = ppp_if_fd; + sess_local[s].ppp_if_unit = ifunit; sess_local[s].ppp_if_idx = ifr.ifr_ifindex; dhcpv6_listen(ifr.ifr_ifindex); @@ -1256,6 +1255,7 @@ static int delete_kernel_accel(sessionidt s) LOG(3, s, session[s].tunnel, "Stopping kernel-accelerated support for %u:%u\n", session[s].tunnel, s); + sess_local[s].ppp_if_unit = -1; sess_local[s].ppp_if_idx = 0; ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); @@ -3666,6 +3666,7 @@ static void sessionclear(sessionidt s) sess_local[s].pppox_fd = -1; sess_local[s].ppp_chan_fd = -1; sess_local[s].ppp_if_fd = -1; + sess_local[s].ppp_if_unit = -1; memset(&cli_session_actions[s], 0, sizeof(cli_session_actions[s])); session[s].tunnel = T_FREE; // Mark it as free. @@ -6229,6 +6230,7 @@ static void initdata(int optdebug, char *optconfig) sess_local[i].pppox_fd = -1; sess_local[i].ppp_chan_fd = -1; sess_local[i].ppp_if_fd = -1; + sess_local[i].ppp_if_unit = -1; } session[MAXSESSION - 1].next = 0; sessionfree = 1; diff --git a/l2tpns.h b/l2tpns.h index 8b5468f..33e9f9a 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -31,6 +31,8 @@ // Tunnel Id reserved for pppoe #define TUNNEL_ID_PPPOE 1 +#define PPP_IF_PREFIX "ppp" + #define RADIUS_SHIFT 6 #define RADIUS_FDS (1 << RADIUS_SHIFT) #define RADIUS_MASK ((1 << RADIUS_SHIFT) - 1) @@ -459,7 +461,9 @@ typedef struct // ppp interface int ppp_if_fd; - // ppp interface index + // ppp interface number (ppp%d) + int ppp_if_unit; + // ppp interface index (for rtnetlink etc.) int ppp_if_idx; } sessionlocalt; From a22295d80415ecc1c5dd0576fe024ab626ab8c0f Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 21 Jan 2024 02:45:44 +0100 Subject: [PATCH 23/28] Fasten quitting l2tp game Drop routes as quickly as possible to lose as few packets as possible in the meanwhile. --- cluster.c | 24 +++++--------- l2tpns.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ l2tpns.h | 1 + 3 files changed, 105 insertions(+), 16 deletions(-) diff --git a/cluster.c b/cluster.c index ddfc1fe..d4a7a8a 100644 --- a/cluster.c +++ b/cluster.c @@ -435,8 +435,7 @@ static void send_heartbeat(int seq, uint8_t *data, int size) if (size > sizeof(past_hearts[0].data)) { LOG(0, 0, 0, "Tried to heartbeat something larger than the maximum packet!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } i = seq % HB_HISTORY_SIZE; past_hearts[i].seq = seq; @@ -903,8 +902,7 @@ static int hb_add_type(uint8_t **p, int type, int id) break; default: LOG(0, 0, 0, "Found an invalid type in heart queue! (%d)\n", type); - kill(0, SIGTERM); - exit(1); + crash(); } return 0; } @@ -951,8 +949,7 @@ void cluster_heartbeat() if (p > (buff + sizeof(buff))) { // Did we somehow manage to overun the buffer? LOG(0, 0, 0, "FATAL: Overran the heartbeat buffer! This is fatal. Exiting. (size %d)\n", (int) (p - buff)); - kill(0, SIGTERM); - exit(1); + crash(); } // @@ -1011,8 +1008,7 @@ void cluster_heartbeat() // Did we do something wrong? if (p > (buff + sizeof(buff))) { // Did we somehow manage to overun the buffer? LOG(0, 0, 0, "Overran the heartbeat buffer now! This is fatal. Exiting. (size %d)\n", (int) (p - buff)); - kill(0, SIGTERM); - exit(1); + crash(); } LOG(4, 0, 0, "Sending v%d heartbeat #%d, change #%" PRIu64 " with %d changes " @@ -1960,8 +1956,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t LOG(0, 0, 0, "They've seen more state changes (%" PRIu64 " vs my %" PRIu64 ") so I'm gone!\n", h->table_version, config->cluster_table_version); - kill(0, SIGTERM); - exit(1); + crash(); } if (h->table_version < config->cluster_table_version) @@ -1969,8 +1964,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t if (basetime > h->basetime) { LOG(0, 0, 0, "They're an older master than me so I'm gone!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } if (basetime < h->basetime) @@ -1978,8 +1972,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t if (my_address < addr) { // Tie breaker. LOG(0, 0, 0, "They're a higher IP address than me, so I'm gone!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } // @@ -2366,8 +2359,7 @@ int processcluster(uint8_t *data, int size, in_addr_t addr) } LOG(0, 0, 0, "Received a valid C_KILL: I'm going to die now.\n"); - kill(0, SIGTERM); - exit(0); // Lets be paranoid; + crash(); return -1; // Just signalling the compiler. case C_HEARTBEAT: diff --git a/l2tpns.c b/l2tpns.c index 378e6fe..e5e877a 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -2119,6 +2119,66 @@ static int setupif(int ifidx, uint32_t mru, int config_addr) return 0; } +// +// Quickly drop the gateway from the interface +static int disableif(int ifidx) +{ + struct { + // interface setting + struct nlmsghdr nh; + union { + struct ifinfomsg ifinfo; + struct ifaddrmsg ifaddr; + } ifmsg; + char rtdata[32]; // 32 should be enough + } req; + in_addr_t ip; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_DELADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET; + req.ifmsg.ifaddr.ifa_prefixlen = 32; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifidx; + + if (config->nbmultiaddress > 1) + { + int i; + for (i = 0; i < config->nbmultiaddress ; i++) + { + ip = config->iftun_n_address[i]; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + if (rtnetlink_send(&req.nh) < 0) + return -1; + } + } + else + { + if (config->iftun_address) + ip = config->iftun_address; + else + ip = 0x01010101; // 1.1.1.1 + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = NLMSG_DONE; + req.nh.nlmsg_len = NLMSG_LENGTH(0); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + + return 0; +} + // set up LAC UDP ports static int initlacudp(int *pudpfd, in_addr_t ip_dest, uint16_t port_dest) { @@ -3821,6 +3881,33 @@ static void tunnelshutdown(tunnelidt t, char *reason, int result, int error, cha } } +static void drop_routes(void) +{ + unsigned i; + + LOG(1, 0, 0, "Disabling receiving l2tp\n"); + // Disable receiving l2tp trafic first since we don't forward to master any more + disableif(tunidx); + LOG(1, 0, 0, "Dropping routes\n"); + // Disable receiving Internet trafic + for (i = 1; i <= config->cluster_highest_sessionid ; ++i) + { + routesset(i, &session[i], 0); + routes6set(i, &session[i], 0); + } +} + +// +// We ended up in an odd state, better stop here as quickly as possible before +// causing trouble to the rest of the cluster +// +void crash(void) +{ + kill(0, SIGTERM); + drop_routes(); + exit(1); +} + // read and process packet on tunnel (UDP) void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexudpfd) { @@ -6017,6 +6104,7 @@ static void mainloop(void) } } } + LOG(1, 0, 0, "Leaving...\n"); // Are we the master and shutting down?? if (config->cluster_iam_master) @@ -6032,6 +6120,14 @@ static void mainloop(void) // // Important!!! We MUST not process any packets past this point! + // + + // + // Now drop routes as quickly as possible to lose as few packets as + // possible in the meanwhile + // + drop_routes(); + LOG(1, 0, 0, "Shutdown complete\n"); } diff --git a/l2tpns.h b/l2tpns.h index 33e9f9a..7c5ad72 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1037,6 +1037,7 @@ void throttle_session(sessionidt s, int rate_in, int rate_out); int load_tunnel(tunnelidt, tunnelt *); int load_session(sessionidt, sessiont *); void become_master(void); // We're the master; kick off any required master initializations. +void crash(void); // We messed up. Die. // cli.c From a957ff08ee42feb1cb2594073b802be0ea6d16f1 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 21 Jan 2024 03:09:43 +0100 Subject: [PATCH 24/28] Throttle switching kernel acceleration Creating/destroying interfaces etc. does take some time. When e.g. receiving a lot of sessions as new slave, we don't want to stay stuck creating hundreds of interfaces while we are already receiving control messages that we have to forward to master not too late. Switching kernel acceleration can wait a bit most of the time. --- l2tpns.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++---------- l2tpns.h | 3 +++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index e5e877a..04ad4e0 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -137,6 +137,8 @@ static char time_now_string[64] = {0}; // Current time as a string. static int time_changed = 0; // time_now changed char main_quit = 0; // True if we're in the process of exiting. static char main_reload = 0; // Re-load pending +#define MAX_KERNEL_SWITCHES 20 // Maximum number of kernel switches per 1/10th second +static int kernel_switches = 0; // How many kernel switches we performed since last cleanup linked_list *loaded_plugins; linked_list *plugins[MAX_PLUGIN_TYPES]; @@ -1280,7 +1282,8 @@ static int delete_kernel_accel(sessionidt s) // // Enable (set=1) or disable (set=0) kernel PPP acceleration // This basically calls create/delete_kernel_accel, but also updates routes -static void set_kernel_accel(sessionidt s, int set) +// If now is 0, we may delay this if we have already made a lot of switches since last cleanup +static void set_kernel_accel(sessionidt s, int set, int now) { if (set && !can_kernel_accel(s)) /* Still cannot enable it */ @@ -1291,6 +1294,16 @@ static void set_kernel_accel(sessionidt s, int set) /* We don't know the tunnel yet */ return; + if (set && !now && kernel_switches >= MAX_KERNEL_SWITCHES) + { + // We already performed many switches, throttle a bit by just + // marking as pending + sess_local[s].needs_switch = 1; + return; + } + kernel_switches++; + sess_local[s].needs_switch = 0; + routesset(s, &session[s], 0); if (session[s].ppp.ipv6cp == Opened) routes6set(s, &session[s], 0); @@ -1323,7 +1336,7 @@ void switch_kernel_accel(sessionidt s) return; /* Try to enable */ - set_kernel_accel(s, 1); + set_kernel_accel(s, 1, 0); } else { @@ -1334,7 +1347,7 @@ void switch_kernel_accel(sessionidt s) return; /* Has to disable it */ - set_kernel_accel(s, 0); + set_kernel_accel(s, 0, 1); } } @@ -5805,7 +5818,7 @@ static void mainloop(void) sessionidt sid; for (sid = 1; sid <= config->cluster_highest_sessionid ; ++sid) if (session[sid].tunnel == tid) - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); delete_kernel_tunnel(tid); } @@ -5833,12 +5846,12 @@ static void mainloop(void) if (s < 0) { LOG(1, sid, tid, "Error on pppox socket: %s\n", strerror(errno)); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else if (s == 0) { LOG(1, sid, tid, "EOF on pppox socket\n"); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else { @@ -5857,12 +5870,12 @@ static void mainloop(void) if (s < 0) { LOG(1, sid, tid, "Error on ppp channel: %s\n", strerror(errno)); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else if (s == 0) { LOG(1, sid, tid, "EOF on ppp channel\n"); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else processppp_from_kernel(sid, p, s, NULL); @@ -5878,12 +5891,12 @@ static void mainloop(void) if (s < 0) { LOG(1, sid, tid, "Error on ppp if: %s\n", strerror(errno)); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else if (s == 0) { LOG(1, sid, tid, "EOF on ppp if\n"); - set_kernel_accel(sid, 0); + set_kernel_accel(sid, 0, 1); } else processppp_from_kernel(sid, p, s, NULL); @@ -6051,6 +6064,32 @@ static void mainloop(void) next_cluster_ping = TIME + config->cluster_hb_interval; } + // Handle trying to enable kernel accel + { + static double last_switch = 0; + double this_switch; + double diff; + + TIME = now(&this_switch); + diff = this_switch - last_switch; + + // Run during idle time (after we've handled + // all incoming packets) or every 1/10th sec + if (!more || diff > 0.1) + { + kernel_switches = 0; + + for (i = 1; i <= config->cluster_highest_sessionid; i++) + { + // Delayed kernel switch + if (session[i].ppp.lcp == Opened && sess_local[i].needs_switch) + set_kernel_accel(i, can_kernel_accel(i), 0); + } + + last_switch = this_switch; + } + } + if (!config->cluster_iam_master) continue; diff --git a/l2tpns.h b/l2tpns.h index 7c5ad72..59852b9 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -452,6 +452,9 @@ typedef struct // time in milliseconds of the last fragment. uint64_t prev_time; + // Pending kernel switch + int needs_switch; + // l2tp PPPoL2TP socket int pppox_fd; struct pppol2tp_ioc_stats last_stats; From 38bfd3f738cc4967779d67d940f57f89eddf43db Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 3 Feb 2024 18:34:43 +0100 Subject: [PATCH 25/28] Add L2TP bridging offloading support --- l2tpns.c | 186 +++++++++++++++++++++++++++++++++++++++++++------------ l2tpns.h | 1 + 2 files changed, 148 insertions(+), 39 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 04ad4e0..fa7cc2d 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -652,9 +652,19 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) req.glh.version = L2TP_GENL_VERSION; int fd; - if (initudp(&fd, config->bind_n_address[tunnel[tid].indexudp], - htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) - return -1; + if (tunnel[tid].indexudp == config->indexlacudpfd) + { + /* tunnel as LAC */ + if (initlacudp(&fd, htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + } + else + { + /* tunnel as LNS */ + if (initudp(&fd, config->bind_n_address[tunnel[tid].indexudp], + htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + } genetlink_addattr(&req.nh, L2TP_ATTR_FD, &fd, sizeof(fd)); genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); @@ -1136,10 +1146,6 @@ static int can_kernel_accel(sessionidt s) /* MPPP not supported yet */ return 0; - if (session[s].forwardtosession) - /* Forwarding not supported yet */ - return 0; - if (session[s].throttle_in || session[s].throttle_out) /* Throttling not supported */ return 0; @@ -1247,11 +1253,92 @@ err_pppox_fd: return -1; } +// +// Create the kernel PPP accelerated bridge +int create_kernel_bridge(sessionidt s, sessionidt fwds) +{ + static int kernel_cant = 0; + + tunnelidt t = session[s].tunnel; + + if (fwds == s) + /* Meaningless! */ + return -1; + + if (kernel_cant) + /* We have seen that kernel can't do it anyway */ + return -1; + + if (sess_local[s].pppox_fd >= 0) + /* Already set up */ + return 0; + + if (!can_kernel_accel(s) || !can_kernel_accel(fwds)) + return -1; + + int pppox_fd = create_kernel_pppox(s); + if (pppox_fd < 0) + return -1; + + int fwd_pppox_fd = create_kernel_pppox(fwds); + if (fwd_pppox_fd < 0) + goto err_pppox_fd; + + LOG(3, s, t, "Starting kernel-accelerated bridge between %u and %u\n", s, fwds); + + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + if (ppp_chan_fd < 0) + goto err_fwd_pppox_fd; + + int fwd_idx = get_kernel_ppp_chan(fwds, fwd_pppox_fd); + + int ret = ioctl(ppp_chan_fd, PPPIOCBRIDGECHAN, &fwd_idx); + close(ppp_chan_fd); + if (ret < 0) { + if (errno == ENOTTY) + /* Not supported by kernel */ + kernel_cant = 1; + + LOG(2, s, session[s].tunnel, "Can't set LAC bridge: %s\n", strerror(errno)); + goto err_fwd_pppox_fd; + } + + struct epoll_event e; + e.events = EPOLLIN; + + static struct event_data d1[MAXSESSION]; + d1[s].type = FD_TYPE_PPPOX; + d1[s].index = s; + e.data.ptr = &d1[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, pppox_fd, &e); + + d1[fwds].type = FD_TYPE_PPPOX; + d1[fwds].index = fwds; + e.data.ptr = &d1[fwds]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, fwd_pppox_fd, &e); + + sess_local[s].pppox_fd = pppox_fd; + sess_local[fwds].pppox_fd = fwd_pppox_fd; + + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); + memset(&sess_local[fwds].last_stats, 0, sizeof(sess_local[fwds].last_stats)); + + return 0; + +err_fwd_pppox_fd: + close(fwd_pppox_fd); +err_pppox_fd: + close(pppox_fd); + return -1; +} + // // Delete the kernel PPP acceleration static int delete_kernel_accel(sessionidt s) { - if (!sess_local[s].ppp_if_idx) + if (sess_local[s].pppox_fd < 0) /* Already stopped */ return 0; @@ -1260,15 +1347,22 @@ static int delete_kernel_accel(sessionidt s) sess_local[s].ppp_if_unit = -1; sess_local[s].ppp_if_idx = 0; - ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); + if (sess_local[s].ppp_chan_fd >= 0) + ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); - epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_if_fd, NULL); - close(sess_local[s].ppp_if_fd); - sess_local[s].ppp_if_fd = -1; + if (sess_local[s].ppp_if_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_if_fd, NULL); + close(sess_local[s].ppp_if_fd); + sess_local[s].ppp_if_fd = -1; + } - epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_chan_fd, NULL); - close(sess_local[s].ppp_chan_fd); - sess_local[s].ppp_chan_fd = -1; + if (sess_local[s].ppp_chan_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_chan_fd, NULL); + close(sess_local[s].ppp_chan_fd); + sess_local[s].ppp_chan_fd = -1; + } epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].pppox_fd, NULL); close(sess_local[s].pppox_fd); @@ -1290,9 +1384,26 @@ static void set_kernel_accel(sessionidt s, int set, int now) return; tunnelidt t = session[s].tunnel; - if (set && tunnel[t].state == TUNNELUNDEF) - /* We don't know the tunnel yet */ - return; + sessionidt fwds = session[s].forwardtosession; + + if (set) + { + if (tunnel[t].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + + if (fwds) + { + if (session[fwds].forwardtosession != s) + /* We don't know the other session yet */ + return; + + tunnelidt fwdt = session[fwds].tunnel; + if (tunnel[fwdt].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + } + } if (set && !now && kernel_switches >= MAX_KERNEL_SWITCHES) { @@ -1311,10 +1422,22 @@ static void set_kernel_accel(sessionidt s, int set, int now) if (set) { create_kernel_tunnel(t, tunnel[t].far); - create_kernel_accel(s); + + if (fwds) + { + tunnelidt fwdt = session[fwds].tunnel; + create_kernel_tunnel(fwdt, tunnel[fwdt].far); + create_kernel_bridge(s, fwds); + } + else + create_kernel_accel(s); } else + { delete_kernel_accel(s); + if (fwds) + delete_kernel_accel(fwds); + } routesset(s, &session[s], 1); if (session[s].ppp.ipv6cp == Opened) @@ -1327,7 +1450,7 @@ static void set_kernel_accel(sessionidt s, int set, int now) // acceleration is allowed, e.g. snoop void switch_kernel_accel(sessionidt s) { - if (!sess_local[s].ppp_if_idx) + if (sess_local[s].pppox_fd < 0) { /* Acceleration disabled */ @@ -1361,7 +1484,7 @@ static void apply_kernel_stats(sessionidt s) /* It is free */ return; - if (!sess_local[s].ppp_if_idx) + if (sess_local[s].pppox_fd < 0) /* It does not have kernel acceleration */ return; @@ -1391,24 +1514,6 @@ static void apply_kernel_stats(sessionidt s) *last_stats = stats; } -// -// Bridge kernel channels to accelerate LAC -static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) -{ - int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); - int chindx2 = get_kernel_ppp_chan(s, pppox_fd2); - int ret; - - ret = ioctl(ppp_chan_fd, PPPIOCBRIDGECHAN, &chindx2); - close(ppp_chan_fd); - if (ret < 0) - { - LOG(2, s, session[s].tunnel, "Can't set LAC bridge: %s\n", strerror(errno)); - return -1; - } - return 0; -} - // Get interface idx for session static int session_if_idx(sessionidt s) { @@ -4722,6 +4827,9 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu session[s].far = asession; session[s].last_packet = session[s].last_data = time_now; + // Now we have the far session number, we can try to enable accelerated forward + create_kernel_bridge(s, session[s].forwardtosession); + control32(c, 19, 1, 1); // Framing Type control32(c, 24, 10000000, 1); // Tx Connect Speed controladd(c, asession, t); // send the message diff --git a/l2tpns.h b/l2tpns.h index 59852b9..74c6a02 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -1039,6 +1039,7 @@ void rebuild_address_pool(void); void throttle_session(sessionidt s, int rate_in, int rate_out); int load_tunnel(tunnelidt, tunnelt *); int load_session(sessionidt, sessiont *); +int create_kernel_bridge(sessionidt s, sessionidt fwds); void become_master(void); // We're the master; kick off any required master initializations. void crash(void); // We messed up. Die. From 3add0afa3809479ef2181645ee0e3260c5314a4b Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 4 Feb 2024 02:26:28 +0100 Subject: [PATCH 26/28] Log from which IPs odd UDP packets come from --- l2tpns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/l2tpns.c b/l2tpns.c index fa7cc2d..2d5f348 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -4937,7 +4937,7 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s return; } - LOG(1, s, t, "UDP packet contains session which is not opened. Dropping packet.\n"); + LOG(1, s, t, "UDP packet from %s contains session which is not opened. Dropping packet.\n", fmtaddr(addr->sin_addr.s_addr, 0)); STAT(tunnel_rx_errors); return; } From 3ab80a9d6630daff2985115efdcec728d897bfa7 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 6 Feb 2024 20:06:58 +0100 Subject: [PATCH 27/28] IPV6CP: suggest an interface identifier option Some peers seem to be sending no such option. The rfc says in that case to try to send an option with a suggested value. Don't insist on it however if the peer still doesn't send any. That can fix IPv6 for some peers. --- l2tpns.h | 3 +++ ppp.c | 42 ++++++++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/l2tpns.h b/l2tpns.h index 74c6a02..3e7f4a6 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -441,6 +441,9 @@ typedef struct // last LCP Echo time_t last_echo; + // Whether we tried to suggest the IPV6CP identifier option. + int tried_identifier; + // last unsolicited RA sent to user time_t last_ra; diff --git a/ppp.c b/ppp.c index 1f5791f..7f5a5d2 100644 --- a/ppp.c +++ b/ppp.c @@ -1560,6 +1560,17 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) int gotip = 0; uint32_t ident[2]; + if (session[s].ipv6address.s6_addr[0]) + { + // LSB 64bits of assigned IPv6 address to user (see radius attribut Framed-IPv6-Address) + memcpy(&ident[0], &session[s].ipv6address.s6_addr[8], 8); + } + else + { + ident[0] = htonl(session[s].ip); + ident[1] = 0; + } + while (length > 2) { if (!o[1] || o[1] > length) return; @@ -1570,17 +1581,6 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) gotip++; // seen address if (o[1] != 10) return; - if (session[s].ipv6address.s6_addr[0]) - { - // LSB 64bits of assigned IPv6 address to user (see radius attribut Framed-IPv6-Address) - memcpy(&ident[0], &session[s].ipv6address.s6_addr[8], 8); - } - else - { - ident[0] = htonl(session[s].ip); - ident[1] = 0; - } - if (memcmp(o + 2, ident, sizeof(ident))) { q = ppp_conf_nak(s, b, sizeof(b), PPPIPV6CP, &response, q, p, o, (uint8_t *)ident, sizeof(ident)); @@ -1599,24 +1599,30 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) o += o[1]; } + if (!response && !gotip && sess_local[s].tried_identifier++ < 2) + { + uint8_t identifier_option[6] = { 1, 6 }; + + // No interface identifier option, try to suggest one + q = ppp_conf_nak(s, b, sizeof(b), PPPIPV6CP, &response, q, p, identifier_option, (uint8_t *)ident, sizeof(ident)); + if (!q) return; + } + if (response) { l = q - response; // IPV6CP packet length *((uint16_t *) (response + 2)) = htons(l); // update header } - else if (gotip) + else { + if (!gotip) + LOG(2, s, t, "No interface identifier in IPV6CP request, hoping for the best\n"); + // Send packet back as ConfigAck response = makeppp(b, sizeof(b), p, l, s, t, PPPIPV6CP, 0, 0, 0); if (!response) return; *response = ConfigAck; } - else - { - LOG(3, s, t, "No interface identifier in IPV6CP request\n"); - STAT(tunnel_rx_errors); - return; - } switch (session[s].ppp.ipv6cp) { From 85044bc6a4ae5bad84653b82dd911976ab3db22b Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 6 Feb 2024 20:52:54 +0100 Subject: [PATCH 28/28] control: Queue packets received Out-of-Order Retransmissions are done very slowly, so we'd really better store the Out-of-Order messages so that we can catch up quickly once we get the missing piece. --- l2tpns.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++------- l2tpns.h | 2 ++ 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/l2tpns.c b/l2tpns.c index 2d5f348..9d0aa4f 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -3456,6 +3456,7 @@ static controlt *controlnew(uint16_t mtype) } assert(c); c->next = 0; + c->ns = 0; // only used for OoO receives c->buf[0] = 0xC8; // flags c->buf[1] = 0x02; // ver c->length = 12; @@ -4033,6 +4034,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu uint8_t *recvchalresponse = NULL; uint16_t l = len, t = 0, s = 0, ns = 0, nr = 0; uint8_t *p = buf + 2; + controlt *c; CSTAT(processudp); @@ -4175,19 +4177,63 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu fmtaddr(htonl(tunnel[t].ip), 0), tunnel[t].port, t); } - // If the 'ns' just received is not the 'nr' we're - // expecting, just send an ack and drop it. - // - // if 'ns' is less, then we got a retransmitted packet. - // if 'ns' is greater than missed a packet. Either way - // we should ignore it. + // If the 'ns' just received is is less than the 'nr' + // we're expecting, we got a retransmitted packet. + // Just send an ack and drop it. + if (ns - tunnel[t].nr >= 0x8000u) + { + if (l) // Is this not a ZLB? + controlnull(t); + return; + } + + // If the 'ns' just received is greater than the 'nr' + // we're expecting, we missed a packet. If it's not too + // big and new, store this one to look after it after we + // get the retransmission of the missing piece. if (ns != tunnel[t].nr) { - // is this the sequence we were expecting? STAT(tunnel_rx_errors); LOG(1, 0, t, " Out of sequence tunnel %u, (%u is not the expected %u)\n", t, ns, tunnel[t].nr); + if (tunnel[t].state == TUNNELOPEN + && ns - tunnel[t].nr <= 10 && len <= MAXCONTROL) + { + // Not too big and not too new + controlt **curp; + + LOG(2, 0, t, " Queueing it\n"); + + // Find where to put it in the queue + for (curp = &tunn_local[t].controlr; (c = *curp); curp = &c->next) + { + if (ns == c->ns) + { + LOG(2, 0, t, " We already had this piece\n"); + break; + } + if (ns < c->ns) + { + // The rest is greater than this, put this before + c = NULL; + break; + } + } + + if (curp && !c) + { + // We don't already have this piece, store it + c = controlnew(0); + c->next = *curp; + *curp = c; + c->length = len; + c->ns = ns; + memcpy(c->buf, buf, len); + } + } + + // Tell peer what we have if (l) // Is this not a ZLB? controlnull(t); return; @@ -4199,7 +4245,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu // some to clear maybe? while (tunnel[t].controlc > 0 && (((tunnel[t].ns - tunnel[t].controlc) - nr) & 0x8000)) { - controlt *c = tunnel[t].controls; + c = tunnel[t].controls; tunnel[t].controls = c->next; tunnel[t].controlc--; c->next = controlfree; @@ -4215,7 +4261,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu { // some control packets can now be sent that were previous stuck out of window int tosend = tunnel[t].window - skip; - controlt *c = tunnel[t].controls; + c = tunnel[t].controls; while (c && skip) { c = c->next; @@ -4259,7 +4305,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu STAT(tunnel_rx_errors); free(sendchalresponse); free(recvchalresponse); - return; + goto out; } p += n; // next l -= n; @@ -4815,7 +4861,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu } free(sendchalresponse); free(recvchalresponse); - return; + goto out; case 11: // ICRP LOG(3, s, t, "Received ICRP\n"); if (session[s].forwardtosession) @@ -4878,6 +4924,37 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu free(sendchalresponse); free(recvchalresponse); cluster_send_tunnel(t); + +out: + // We processed a control packet, check if we can process the OoO queue + + c = tunn_local[t].controlr; + while (c && c->ns - tunnel[t].nr >= 0x8000u) + { + // We received this again in the meanwhile! Drop. + LOG(2, 0, t, " We received again %u, drop\n", c->ns); + + tunn_local[t].controlr = c->next; + c->next = controlfree; + controlfree = c; + + c = tunn_local[t].controlr; + } + + if (c && c->ns == tunnel[t].nr) + { + // We caught up with what we saved for later! Dequeue this. + LOG(2, 0, t, " We caught up with %u\n", c->ns); + tunn_local[t].controlr = c->next; + + // And process it. + // Note: this might recurse for the rest of the queue, but the + // queue is bound and while processing it we are not queueing more. + processudp(c->buf, c->length, addr, indexudpfd); + + c->next = controlfree; + controlfree = c; + } } else { @@ -7634,7 +7711,7 @@ int load_tunnel(tunnelidt t, tunnelt *new) // Clear tunnel control messages. These are dynamically allocated. // If we get unlucky, this may cause the tunnel to drop! // - tunnel[t].controls = tunnel[t].controle = NULL; + tunnel[t].controls = tunnel[t].controle = tunn_local[t].controlr = NULL; tunnel[t].controlc = 0; if (tunnel[t].state == TUNNELFREE) diff --git a/l2tpns.h b/l2tpns.h index 3e7f4a6..253633e 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -275,6 +275,7 @@ typedef struct controls // control message { struct controls *next; // next in queue uint16_t length; // length + uint16_t ns; // sequence number uint8_t buf[MAXCONTROL]; } controlt; @@ -506,6 +507,7 @@ tunnelt; typedef struct { + controlt *controlr; // queue of OoO-received messages int l2tp_fd; // kernel acceleration UDP socket } tunnellocalt;