diff --git a/cli.c b/cli.c index bfbae29..e3cea88 100644 --- a/cli.c +++ b/cli.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -316,6 +317,36 @@ void cli_do(int sockfd) socklen_t l = sizeof(addr); if (fork_and_close()) return; + + /* Check that fork_and_close has closed everything but std* and the socket */ + int fdfd = open("/dev/fd", O_RDONLY|O_DIRECTORY); + if (fdfd >= 0) + { + DIR *fds = fdopendir(fdfd); + if (fds) + { + struct dirent *ent; + while ((ent = readdir(fds))) + { + if (!strcmp(ent->d_name, ".") + || !strcmp(ent->d_name, "..")) + continue; + + int fd = atoi(ent->d_name); + if (fd <= STDERR_FILENO) + continue; + if (fd == fdfd || fd == sockfd) + continue; + if (log_stream && fd == fileno(log_stream)) + continue; + + LOG(0, 0, 0, "Warning: fd %d is still open within cli. This may interfere with operations.\n", fd); + } + closedir(fds); + } + close(fdfd); + } + if (getpeername(sockfd, (struct sockaddr *) &addr, &l) == 0) { require_auth = addr.sin_addr.s_addr != inet_addr("127.0.0.1"); @@ -420,10 +451,23 @@ static int cmd_show_session(struct cli_def *cli, const char *command, char **arg for (i = 0; i < argc; i++) { unsigned int s, b_in, b_out, r; + int ifunit = -1; + + if (!strncmp(argv[i], PPP_IF_PREFIX, strlen(PPP_IF_PREFIX))) + { + char *start = argv[i]+strlen(PPP_IF_PREFIX); + char *end; + long res = strtol(start, &end, 10); + if (end != start && !*end) + ifunit = res; + } for (s = 0; s < MAXSESSION; s++) - if (!strcmp(argv[i], session[s].user)) + { + if ((ifunit >= 0 && sess_local[s].ppp_if_unit == ifunit) + || !strcmp(argv[i], session[s].user)) break; + } if (s >= MAXSESSION) { s = atoi(argv[i]); @@ -442,6 +486,8 @@ static int cmd_show_session(struct cli_def *cli, const char *command, char **arg cli_print(cli, "\tRemote ID:\t%d", session[s].far); if (session[s].bundle) cli_print(cli, "\tBundle ID:\t%d (%d)", session[s].bundle, bundle[session[s].bundle].num_of_links); + if (sess_local[s].ppp_if_unit >= 0) + cli_print(cli, "\tInterface:\tppp%d", sess_local[s].ppp_if_unit); cli_print(cli, "\tPPP Phase:\t%s", ppp_phase(session[s].ppp.phase)); switch (session[s].ppp.phase) { diff --git a/cluster.c b/cluster.c index d68918a..d4a7a8a 100644 --- a/cluster.c +++ b/cluster.c @@ -435,8 +435,7 @@ static void send_heartbeat(int seq, uint8_t *data, int size) if (size > sizeof(past_hearts[0].data)) { LOG(0, 0, 0, "Tried to heartbeat something larger than the maximum packet!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } i = seq % HB_HISTORY_SIZE; past_hearts[i].seq = seq; @@ -903,8 +902,7 @@ static int hb_add_type(uint8_t **p, int type, int id) break; default: LOG(0, 0, 0, "Found an invalid type in heart queue! (%d)\n", type); - kill(0, SIGTERM); - exit(1); + crash(); } return 0; } @@ -951,8 +949,7 @@ void cluster_heartbeat() if (p > (buff + sizeof(buff))) { // Did we somehow manage to overun the buffer? LOG(0, 0, 0, "FATAL: Overran the heartbeat buffer! This is fatal. Exiting. (size %d)\n", (int) (p - buff)); - kill(0, SIGTERM); - exit(1); + crash(); } // @@ -1011,8 +1008,7 @@ void cluster_heartbeat() // Did we do something wrong? if (p > (buff + sizeof(buff))) { // Did we somehow manage to overun the buffer? LOG(0, 0, 0, "Overran the heartbeat buffer now! This is fatal. Exiting. (size %d)\n", (int) (p - buff)); - kill(0, SIGTERM); - exit(1); + crash(); } LOG(4, 0, 0, "Sending v%d heartbeat #%d, change #%" PRIu64 " with %d changes " @@ -1364,14 +1360,7 @@ static int cluster_recv_tunnel(int more, uint8_t *p) } } - memcpy(&tunnel[more], p, sizeof(tunnel[more]) ); - - // - // Clear tunnel control messages. These are dynamically allocated. - // If we get unlucky, this may cause the tunnel to drop! - // - tunnel[more].controls = tunnel[more].controle = NULL; - tunnel[more].controlc = 0; + load_tunnel(more, (tunnelt *) p); // Copy tunnel into tunnel table.. LOG(5, 0, more, "Received tunnel update\n"); @@ -1967,8 +1956,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t LOG(0, 0, 0, "They've seen more state changes (%" PRIu64 " vs my %" PRIu64 ") so I'm gone!\n", h->table_version, config->cluster_table_version); - kill(0, SIGTERM); - exit(1); + crash(); } if (h->table_version < config->cluster_table_version) @@ -1976,8 +1964,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t if (basetime > h->basetime) { LOG(0, 0, 0, "They're an older master than me so I'm gone!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } if (basetime < h->basetime) @@ -1985,8 +1972,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t if (my_address < addr) { // Tie breaker. LOG(0, 0, 0, "They're a higher IP address than me, so I'm gone!\n"); - kill(0, SIGTERM); - exit(1); + crash(); } // @@ -2373,8 +2359,7 @@ int processcluster(uint8_t *data, int size, in_addr_t addr) } LOG(0, 0, 0, "Received a valid C_KILL: I'm going to die now.\n"); - kill(0, SIGTERM); - exit(0); // Lets be paranoid; + crash(); return -1; // Just signalling the compiler. case C_HEARTBEAT: diff --git a/dhcp6.c b/dhcp6.c index cb145eb..ffb738d 100644 --- a/dhcp6.c +++ b/dhcp6.c @@ -4,13 +4,21 @@ * GPL licenced */ +#define _GNU_SOURCE #include #include #include +#include +#include +#include #include "dhcp6.h" #include "l2tpns.h" #include "ipv6_u.h" +#include "cluster.h" +#include "util.h" + +int dhcpv6fd; struct dhcp6_in_option { @@ -29,7 +37,7 @@ static struct dhcp6_in_option list_option; static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer); -static void dhcp6_send_reply(sessionidt s, tunnelidt t, struct in6_addr *ip6_src) +static void dhcp6_send_reply(sessionidt s, tunnelidt t, const struct in6_addr *ip6_src) { struct ip6_hdr *p_ip6_hdr; struct udphdr *p_udp; @@ -309,26 +317,22 @@ static char * get_msg_type(uint8_t type) } } -void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) +void dhcpv6_process(sessionidt s, tunnelidt t, const struct in6_addr *addr, uint8_t *p, uint16_t l) { - struct ip6_hdr *p_ip6_hdr_in; - struct dhcp6_mess_hdr *p_mess_hdr; + struct dhcp6_mess_hdr *p_mess_hdr = (struct dhcp6_mess_hdr *) p; struct dhcp6_opt_h *p_opt; uint8_t *p_end; uint16_t len; CSTAT(dhcpv6_process); - p_ip6_hdr_in = (struct ip6_hdr *) p; - p_mess_hdr = (struct dhcp6_mess_hdr *) (p + 48); - LOG(3, s, t, "Got DHCPv6 message Type: %s(%d)\n", get_msg_type(p_mess_hdr->type), p_mess_hdr->type); if (!session[s].route6[0].ipv6route.s6_addr[0] || !session[s].route6[0].ipv6prefixlen) return; p_opt = (struct dhcp6_opt_h *) &p_mess_hdr[1]; - p_end = ((uint8_t *)p_ip6_hdr_in) + ntohs(p_ip6_hdr_in->ip6_plen) + sizeof(*p_ip6_hdr_in); + p_end = p + l; memset(&list_option, 0, sizeof(list_option)); list_option.p_mess_hdr = p_mess_hdr; while (((uint8_t *)p_opt) < p_end) @@ -413,7 +417,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -458,8 +462,8 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); - send_ipv6_ra(s, t, &p_ip6_hdr_in->ip6_src); // send a RA + dhcp6_send_reply(s, t, addr); + send_ipv6_ra(s, t, addr); // send a RA } break; @@ -489,7 +493,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -501,7 +505,7 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } - dhcp6_send_reply(s, t, &p_ip6_hdr_in->ip6_src); + dhcp6_send_reply(s, t, addr); } break; @@ -527,6 +531,37 @@ void dhcpv6_process(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) return; } +void dhcpv6_process_from_ipv6(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) +{ + struct ip6_hdr *p_ip6_hdr_in = (struct ip6_hdr *) p; + struct in6_addr *addr = &p_ip6_hdr_in->ip6_src; + uint16_t ipv6_len = ntohs(p_ip6_hdr_in->ip6_plen); + + l -= sizeof(*p_ip6_hdr_in); + p += sizeof(*p_ip6_hdr_in); + + if (ipv6_len > l) + { + LOG(5, 0, 0, "bogus IPv6 packet size??\n"); + return; + } + + if (p_ip6_hdr_in->ip6_nxt != IPPROTO_UDP) + { + LOG(5, 0, 0, "not UDP DHCP packet??\n"); + return; + } + if (ipv6_len < sizeof(struct udphdr)) + { + LOG(5, 0, 0, "bogus IPv6 packet size for UDP??\n"); + return; + } + ipv6_len -= sizeof(struct udphdr); + p += sizeof(struct udphdr); + + dhcpv6_process(s, t, addr, p, ipv6_len); +} + static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer) { int n = strlen(strdns); @@ -570,6 +605,8 @@ static int dhcpv6_format_dns_search_name(const char *strdns, uint8_t *buffer) void dhcpv6_init(void) { uint32_t id; + int on = 1; + struct sockaddr_in6 addr; dhcp6_local_serverid.opt_hdr.code = htons(D6_OPT_SERVERID); dhcp6_local_serverid.opt_hdr.len = htons(4 + sizeof(id)); @@ -582,4 +619,129 @@ void dhcpv6_init(void) id = htobe32(0xFDFDFAFA); memcpy(dhcp6_local_serverid.duid.u.ll.addr, &id, sizeof(id)); + + dhcpv6fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); + if (dhcpv6fd < 0) + LOG(1, 0, 0, "DHCPv6: could not create UDP socket: %s\n", strerror(errno)); + +#ifdef SO_REUSEPORT + if (setsockopt(dhcpv6fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set reusing port: %s\n", strerror(errno)); +#endif + + if (setsockopt(dhcpv6fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set reusing address: %s\n", strerror(errno)); + +#ifdef IPV6_RECVPKTINFO + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_RECVPKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not request pktinfo: %s\n", strerror(errno)); +#else + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_PKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not request pktinfo: %s\n", strerror(errno)); +#endif +#ifdef IPV6_V6ONLY + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "DHCPv6: could not set v6only: %s\n", strerror(errno)); +#endif + + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(DHCP6_SERVER_PORT); + if (bind(dhcpv6fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + LOG(1, 0, 0, "DHCPv6: could not bind to DHCPv6 server port\n"); +} + +// +// A new ppp interface was created, watch for DHCPv6 on it +void dhcpv6_listen(int ifidx) +{ + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + mreq.ipv6mr_interface = ifidx; + inet_pton(AF_INET6, DHCP6_SERVER_ADDRESS, &mreq.ipv6mr_multiaddr); + if (setsockopt(dhcpv6fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) + LOG(2, 0, 0, "DHCPv6: could not join DHCPv6 group: %s\n", strerror(errno)); +} + +// +// A DHCPv6 request was received on a ppp interface, receive it +void dhcpv6_process_from_kernel(uint8_t *p, size_t size_bufp) +{ + struct sockaddr_storage fromaddr; + struct sockaddr_in6 *sin6; + socklen_t fromlen = sizeof(fromaddr); + struct in6_addr toaddr; + int ifidx; + int r, s, t; + + r = recvfromto6(dhcpv6fd, p, size_bufp, 0, (struct sockaddr *) &fromaddr, &fromlen, &toaddr, &ifidx); + if (r < 0) + { + static time_t lastwarn; + time_t now = time(NULL); + if (now > lastwarn) + { + LOG(5, 0, 0, "DHCPV6: reception error: %s\n", strerror(errno)); + lastwarn = now; + } + return; + } + LOG(5, 0, 0, "Got packet on DHCP socket on if %d\n", ifidx); + + if (fromaddr.ss_family != AF_INET6) + { + LOG(5, 0, 0, "DHCPV6: got strange family %d\n", fromaddr.ss_family); + return; + } + sin6 = (struct sockaddr_in6 *) &fromaddr; + + if (ntohs(sin6->sin6_port) != DHCP6_CLIENT_PORT) + { + LOG(5, 0, 0, "DHCPV6: got strange client port %d\n", ntohs(sin6->sin6_port)); + return; + } + + for (s = 1; s < MAXSESSION; s++) + { + if (sess_local[s].ppp_if_idx != ifidx) + continue; + + t = session[s].tunnel; + + if (config->cluster_iam_master) + dhcpv6_process(s, t, &sin6->sin6_addr, p, r); + else + { + // DHCPV6 must be managed by the Master. + + // Fake UDPv6 header + struct udphdr *udp = (struct udphdr *)p - 1; + udp->source = sin6->sin6_port; + udp->dest = htons(DHCP6_SERVER_PORT); + udp->len = sizeof(*udp) + r; + // udp->check is not checked by Master anyway + r += sizeof(*udp); + + struct ip6_hdr *ip6 = (struct ip6_hdr *)udp - 1; + ip6->ip6_flow = htonl(6); + ip6->ip6_plen = htons(r); + ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_hlim = 255; + memcpy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(sin6->sin6_addr)); + memcpy(&ip6->ip6_dst, &toaddr, sizeof(toaddr)); + r += sizeof(*ip6); + + uint16_t *w = (uint16_t *)ip6 - 4; + w[0] = htons(0x0002); /* L2TP data*/ + w[1] = htons(t); + w[2] = htons(s); + w[3] = htons(PPPIPV6); /* PPP protocol */ + r += 8; + + master_forward_packet((uint8_t *) w, r, htonl(tunnel[t].ip), htons(tunnel[t].port), tunnel[t].indexudp); + } + + break; + } } diff --git a/dhcp6.h b/dhcp6.h index 4b0eb19..38fa445 100644 --- a/dhcp6.h +++ b/dhcp6.h @@ -7,6 +7,10 @@ #ifndef __DHCP6_H__ #define __DHCP6_H__ +#define DHCP6_CLIENT_PORT 546 +#define DHCP6_SERVER_PORT 547 +#define DHCP6_SERVER_ADDRESS "ff02::1:2" + #define DHCP6_SOLICIT 1 #define DHCP6_ADVERTISE 2 #define DHCP6_REQUEST 3 @@ -212,7 +216,10 @@ struct dhcp6_opt_ia_prefix { } __attribute__((packed)); // dhcp6.c -void dhcpv6_process(uint16_t s, uint16_t t, uint8_t *p, uint16_t l); +extern int dhcpv6fd; +void dhcpv6_process_from_ipv6(uint16_t s, uint16_t t, uint8_t *p, uint16_t l); void dhcpv6_init(void); +void dhcpv6_listen(int ifidx); +void dhcpv6_process_from_kernel(uint8_t *p, size_t size_bufp); #endif /* __DHCP6_H__ */ diff --git a/docs/manpages/startup-config.5 b/docs/manpages/startup-config.5 index e039c15..62002d1 100644 --- a/docs/manpages/startup-config.5 +++ b/docs/manpages/startup-config.5 @@ -412,6 +412,20 @@ connected users. .PP Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +.PP +\f[B]kernel_accel\f[R] (boolean) +.PP +Determines whether or not to enable kernel acceleration. +Note that only one l2tpns instance can use it per network namespace, +otherwise they will step on each other. +Also, if you have a lot of clients (e.g.\ at least a hundred), listening +for DHCPv6 and RS requires a lot of igmp6 subscriptions, tuning sysctl +may be needed, such as +.PP +sysctl net.core.optmem_max=1048576 +.PP +otherwise the logs will mention failures to subscribe due to lack of +memory. .SS DHCPv6 And IPv6 SETTINGS .PP \f[B]dhcp6_preferred_lifetime\f[R] (int) diff --git a/docs/src/man/startup-config.5.md b/docs/src/man/startup-config.5.md index 885470a..1ca67a2 100644 --- a/docs/src/man/startup-config.5.md +++ b/docs/src/man/startup-config.5.md @@ -251,6 +251,14 @@ The following `variables` may be set: Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +**kernel\_accel** (boolean) + + Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. Also, if you have a lot of clients (e.g. at least a hundred), listening for DHCPv6 and RS requires a lot of igmp6 subscriptions, tuning sysctl may be needed, such as + +sysctl net.core.optmem\_max=1048576 + +otherwise the logs will mention failures to subscribe due to lack of memory. + ## DHCPv6 And IPv6 SETTINGS **dhcp6\_preferred\_lifetime** (int) diff --git a/etc/startup-config.default b/etc/startup-config.default index 2249454..496522e 100644 --- a/etc/startup-config.default +++ b/etc/startup-config.default @@ -146,3 +146,18 @@ set ppp_keepalive yes # Walled garden #load plugin "garden" + +# Kernel acceleration, enable on no more than one instance! +#set kernel_accel yes +# +# You will probably want to also enable MSS clamping, which l2tpns won't be able to do any more: +# iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu +# ip6tables -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu +# or +# nft add rule inet filter forward tcp flags syn tcp option maxseg size set rt mtu +# +# and allow dhcpv6 traffic: +# iptables -A INPUT -i ppp+ -p udp --sport 546 --dport 547 -j ACCEPT +# +# and increase the memory available for igmp6 for DHCPv6 and RS: +# sysctl net.core.optmem_max=10485760 diff --git a/garden.c b/garden.c index 0e6880c..bbfca34 100644 --- a/garden.c +++ b/garden.c @@ -238,6 +238,8 @@ int garden_session(sessiont *s, int flag, char *newuser) s->walled_garden = 0; + // TODO: try to enable kernel acceleration with switch_kernel_accel(s); + if (flag != F_CLEANUP) { /* OK, we're up! */ diff --git a/icmp.c b/icmp.c index 54690eb..3e45e58 100644 --- a/icmp.c +++ b/icmp.c @@ -6,13 +6,62 @@ #include #include #include +#include +#include #include "dhcp6.h" #include "l2tpns.h" #include "ipv6_u.h" +#include "util.h" + +int icmpv6fd; static uint16_t _checksum(uint8_t *addr, int count); +void icmpv6_init(void) +{ + int on = 1, check = 2; + struct icmp6_filter filter; + + icmpv6fd = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6); + if (icmpv6fd < 0) + LOG(1, 0, 0, "ICMPv6: could not create socket: %s\n", strerror(errno)); + +#ifdef IPV6_RECVPKTINFO + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_RECVPKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "ICMPv6: could not request pktinfo: %s\n", strerror(errno)); +#else + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_PKTINFO, &on, sizeof(on)) < 0) + LOG(1, 0, 0, "ICMPv6: could not request pktinfo: %s\n", strerror(errno)); +#endif + +#ifdef __linux__ + if (setsockopt(icmpv6fd, SOL_RAW, IPV6_CHECKSUM, &check, sizeof(check)) < 0) +#else + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_CHECKSUM, &check, sizeof(check)) < 0) +#endif + LOG(1, 0, 0, "ICMPv6: could not request for checking checksums: %s\n", strerror(errno)); + + ICMP6_FILTER_SETBLOCKALL(&filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter); + + if (setsockopt(icmpv6fd, SOL_ICMPV6, ICMP6_FILTER, &filter, sizeof(filter)) < 0) + LOG(1, 0, 0, "ICMPv6: could not set filter for RS: %s\n", strerror(errno)); +} + +// +// A new ppp interface was created, watch for ICMPv6 RS on it +void icmpv6_listen(int ifidx) +{ + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + mreq.ipv6mr_interface = ifidx; + inet_pton(AF_INET6, "ff02::2", &mreq.ipv6mr_multiaddr); + if (setsockopt(icmpv6fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) + LOG(2, 0, 0, "ICMPv6: could not join all routers group: %s\n", strerror(errno)); +} + void host_unreachable(in_addr_t destination, uint16_t id, in_addr_t source, uint8_t *packet, int packet_len) { char buf[128] = {0}; @@ -96,7 +145,7 @@ struct nd_opt_rdnss_info_l2tpns struct in6_addr nd_opt_rdnssi[0]; }; -void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip) +void send_ipv6_ra(sessionidt s, tunnelidt t, const struct in6_addr *ip) { struct nd_opt_prefix_info *pinfo; struct ip6_hdr *p_ip6_hdr; @@ -203,3 +252,54 @@ void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip) tunnelsend(b, l + (((uint8_t *) p_ip6_hdr)-b), t); // send it... return; } + +// +// An ICMPv6 request was received on a ppp interface, receive it +void icmpv6_process_from_kernel(uint8_t *p, size_t size_bufp) +{ + struct sockaddr_storage fromaddr; + struct sockaddr_in6 *sin6; + socklen_t fromlen = sizeof(fromaddr); + struct in6_addr toaddr; + int ifidx; + int r, s, t; + + r = recvfromto6(icmpv6fd, p, size_bufp, 0, (struct sockaddr *) &fromaddr, &fromlen, &toaddr, &ifidx); + if (r < 0) + { + static time_t lastwarn; + time_t now = time(NULL); + if (now > lastwarn) + { + LOG(5, 0, 0, "ICMPV6: reception error: %s\n", strerror(errno)); + lastwarn = now; + } + return; + } + LOG(5, 0, 0, "Got packet on ICMP socket on if %d\n", ifidx); + + if (fromaddr.ss_family != AF_INET6) + { + LOG(5, 0, 0, "ICMPV6: got strange family %d\n", fromaddr.ss_family); + return; + } + sin6 = (struct sockaddr_in6 *) &fromaddr; + + if (*p != ND_ROUTER_SOLICIT) + { + LOG(5, 0, 0, "ICMPV6: not router sollicitation??: %d\n", *p); + return; + } + + for (s = 1; s < MAXSESSION; s++) + { + if (sess_local[s].ppp_if_idx != ifidx) + continue; + + t = session[s].tunnel; + + send_ipv6_ra(s, t, &sin6->sin6_addr); + + break; + } +} diff --git a/l2tplac.c b/l2tplac.c index 8be2261..0cec975 100644 --- a/l2tplac.c +++ b/l2tplac.c @@ -532,20 +532,8 @@ int lac_session_forward(uint8_t *buf, int len, sessionidt sess, uint16_t proto, if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, len); - session[sess].cin_delta += len; - session[sess].pin++; - sess_local[sess].cin += len; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, len); // byte count - session[s].cout_delta += len; - session[s].pout++; - sess_local[s].cout += len; - sess_local[s].pout++; + update_session_in_stat(sess, 1, len); + update_session_out_stat(s, 1, len); } else session[sess].last_packet = time_now; diff --git a/l2tpns.c b/l2tpns.c index aa9f06f..9d0aa4f 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -38,6 +38,16 @@ #include #include #include +#include +#include +#include +#include +#include + +#ifndef PPPIOCBRIDGECHAN +#define PPPIOCBRIDGECHAN _IOW('t', 53, int) +#define PPPIOCUNBRIDGECHAN _IO('t', 54) +#endif #include "md5.h" #include "dhcp6.h" @@ -58,12 +68,21 @@ #include "pppoe.h" #include "dhcp6.h" +#ifdef HAVE_EPOLL +# include +#else +# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ +# include "fake_epoll.h" +#endif + char * Vendor_name = "Linux L2TPNS"; uint32_t call_serial_number = 0; // Globals configt *config = NULL; // all configuration -int nlfd = -1; // netlink socket +int rtnlfd = -1; // route netlink socket +int genlfd = -1; // generic netlink socket +int genl_l2tp_id = -1; // L2TP generic netlink ID int tunfd = -1; // tun interface file handle. (network device) int udpfd[MAX_UDPFD + 1] = INIT_TABUDPFD; // array UDP file handle + 1 for lac udp int udplacfd = -1; // UDP LAC file handle @@ -78,10 +97,11 @@ int epollfd = -1; // event polling time_t basetime = 0; // base clock char hostname[MAXHOSTNAME] = ""; // us. static int tunidx; // ifr_ifindex of tun device -int nlseqnum = 0; // netlink sequence number -int min_initok_nlseqnum = 0; // minimun seq number for messages after init is ok +int rtnlseqnum = 0; // route netlink sequence number +int genlseqnum = 0; // generic netlink sequence number +int min_initok_rtnlseqnum = 0; // minimun seq number for messages after init is ok static int syslog_log = 0; // are we logging to syslog -static FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). +FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). uint32_t last_id = 0; // Unique ID for radius accounting // Guest change char guest_users[10][32]; // Array of guest users @@ -117,6 +137,8 @@ static char time_now_string[64] = {0}; // Current time as a string. static int time_changed = 0; // time_now changed char main_quit = 0; // True if we're in the process of exiting. static char main_reload = 0; // Re-load pending +#define MAX_KERNEL_SWITCHES 20 // Maximum number of kernel switches per 1/10th second +static int kernel_switches = 0; // How many kernel switches we performed since last cleanup linked_list *loaded_plugins; linked_list *plugins[MAX_PLUGIN_TYPES]; @@ -202,6 +224,7 @@ config_descriptt config_values[] = { CONFIG("primary_ipv6_dns", default_ipv6_dns1, IPv6), CONFIG("secondary_ipv6_dns", default_ipv6_dns2, IPv6), CONFIG("default_ipv6_domain_list", default_ipv6_domain_list, STRING), + CONFIG("kernel_accel", kernel_accel, BOOL), { NULL, 0, 0, 0 } }; @@ -227,6 +250,7 @@ static sessiont shut_acct[8192]; static sessionidt shut_acct_n = 0; tunnelt *tunnel = NULL; // Array of tunnel structures. +tunnellocalt *tunn_local = NULL; // Array of local per-tunnel structures. bundlet *bundle = NULL; // Array of bundle structures. fragmentationt *frag = NULL; // Array of fragmentation structures. sessiont *session = NULL; // Array of session structures. @@ -240,8 +264,17 @@ struct Tstats *_statistics = NULL; struct Tringbuffer *ringbuffer = NULL; #endif -static ssize_t netlink_send(struct nlmsghdr *nh); -static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static int initlacudp(int *pudpfd, in_addr_t ip_dest, uint16_t port_dest); +static int initudp(int * pudpfd, in_addr_t ip_bind, in_addr_t ip_dest, uint16_t port_dest); +static int setupif(int ifidx, uint32_t mru, int config_addr); +static ssize_t rtnetlink_send(struct nlmsghdr *nh); +static ssize_t genetlink_send(struct nlmsghdr *nh); +static ssize_t genetlink_recv(void *buf, ssize_t len); +static int netlink_handle_ack(struct nlmsghdr *nh, int gen, int min_initok_nlseqnum, char *tun_nl_phase_msg[]); +static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static void genetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); +static int genetlink_getattr(struct nlmsghdr *nh, int type, void *data, int alen); +static void routesset(sessionidt s, sessiont *sp, int add); static void cache_ipmap(in_addr_t ip, sessionidt s); static void uncache_ipmap(in_addr_t ip); static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s); @@ -261,6 +294,7 @@ static void processcontrol(uint8_t *buf, int len, struct sockaddr_in *addr, int static tunnelidt new_tunnel(void); static void unhide_value(uint8_t *value, size_t len, uint16_t type, uint8_t *vector, size_t vec_len); static void bundleclear(bundleidt b); +static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, struct sockaddr_in *addr, uint16_t indexudpfd); // return internal time (10ths since process startup), set f if given // as a side-effect sets time_now, and time_changed @@ -453,6 +487,1048 @@ void random_data(uint8_t *buf, int len) buf[n++] = (rand() >> 4) & 0xff; } +// +// Clear all existing kernel items of a given type +static int delete_kernel_items(const char *name, int cmd, int id1, int id2, void (*delete_one)(uint32_t id1, uint32_t id2)) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[8192]; + } req; + int seqnum; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, 0, 0, "Deleting all kernel %ss\n", name); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK|NLM_F_DUMP; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = cmd; + req.glh.version = L2TP_GENL_VERSION; + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, 0, "Can't delete %ss: %s\n", name, strerror(errno)); + return -1; + } + seqnum = genlseqnum; + + /* 1 for receiving "done" */ + int nitems = 1; + int done = 0; + + while (done < nitems) + { + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(2, 0, 0, "Can't receive answer for %s deletion: %s\n", name, strerror(errno)); + return -1; + } + + // Iterate over all answers + struct nlmsghdr *nh; + for (nh = &req.nh; size; nh = NLMSG_NEXT(nh, size)) + { + if (!NLMSG_OK(nh, size)) + { + LOG(2, 0, 0, "Short netlink answer: %d vs %zd\n", nh->nlmsg_len, size); + break; + } + + if (nh->nlmsg_type == NLMSG_NOOP) + { + // Ignore + continue; + } + + if (nh->nlmsg_type == NLMSG_DONE) + { + done++; + if (done < nitems) + LOG(3, 0, 0, "Done queueing, still %d/%d %ss deletion pending\n", done, nitems, name); + continue; + } + + if (nh->nlmsg_seq != seqnum) + { + // Consume acknoledgments of deletions. + netlink_handle_ack(nh, 1, 0, NULL); + done++; + } + else + { + // Getting more items + if (nh->nlmsg_type != genl_l2tp_id) + { + LOG(2, 0, 0, "Unexpected generic netlink answer %d\n", req.nh.nlmsg_type); + continue; + } + + if (nh->nlmsg_len < NLMSG_HDRLEN + GENL_HDRLEN) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + continue; + } + + uint32_t ret; + if (genetlink_getattr(nh, id1, &ret, sizeof(ret)) != 0) + LOG(2, 0, 0, "Did not get %s ID\n", name); + else + { + if (!id2) + { + delete_one(ret, 0); + nitems++; + } + else + { + uint32_t ret2; + if (genetlink_getattr(nh, id2, &ret2, sizeof(ret2)) != 0) + LOG(2, 0, 0, "Did not get %s ID2\n", name); + else + { + // Queue deletion for this + delete_one(ret, ret2); + nitems++; + } + } + } + } + } + } + LOG(3, 0, 0, "Done deleting %ss\n", name); + + return 0; +} + +// +// Create tunnel in kernel +static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (!config->kernel_accel) + { + /* Disabled */ + errno = EPERM; + return -1; + } + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + if (tunn_local[tid].l2tp_fd >= 0) + /* Already set up */ + return 0; + + LOG(2, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_TUNNEL_CREATE; + req.glh.version = L2TP_GENL_VERSION; + + int fd; + if (tunnel[tid].indexudp == config->indexlacudpfd) + { + /* tunnel as LAC */ + if (initlacudp(&fd, htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + } + else + { + /* tunnel as LNS */ + if (initudp(&fd, config->bind_n_address[tunnel[tid].indexudp], + htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + } + + genetlink_addattr(&req.nh, L2TP_ATTR_FD, &fd, sizeof(fd)); + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); + uint8_t version = 2; + genetlink_addattr(&req.nh, L2TP_ATTR_PROTO_VERSION, &version, sizeof(version)); + uint16_t encap = L2TP_ENCAPTYPE_UDP; + genetlink_addattr(&req.nh, L2TP_ATTR_ENCAP_TYPE, &encap, sizeof(encap)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, tid, "Can't create tunnel %d to %d: %s\n", tid, peer_tid, strerror(errno)); + close(fd); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for tunnel creation: %s\n", strerror(errno)); + close(fd); + return -1; + } + + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + { + close(fd); + return -1; + } + + struct epoll_event e; + static struct event_data d1[MAXTUNNEL]; + e.events = EPOLLIN; + d1[tid].type = FD_TYPE_L2TP; + d1[tid].index = tid; + e.data.ptr = &d1[tid]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &e); + + tunn_local[tid].l2tp_fd = fd; + return 0; +} + +// +// Update remote address of kernel tunnel +static int update_kernel_tunnel(sessionidt s, tunnelidt t) +{ + if (tunn_local[t].l2tp_fd < 0) { + LOG(2, s, t, "IP change was requested for tunnel before it is connected\n"); + return -1; + } + + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int ret = connect(tunn_local[t].l2tp_fd, (struct sockaddr *)&tunneladdr, sizeof(tunneladdr)); + if (ret < 0) + { + LOG(2, s, t, "Can't switch tunnel UDP socket: %s\n", strerror(errno)); + return -1; + } + return 0; +} + +// +// Queue deleting tunnel in kernel +static int queue_delete_kernel_tunnel(uint32_t tid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, 0, tid, "Deleting kernel tunnel for %u\n", tid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_TUNNEL_DELETE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, tid, "Can't delete tunnel %d: %s\n", tid, strerror(errno)); + return -1; + } + + return 0; +} + +// +// Delete tunnel in kernel +static int delete_kernel_tunnel(uint32_t tid) +{ + int ret = queue_delete_kernel_tunnel(tid); + if (ret < 0) + return -1; + + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for tunnel deletion: %s\n", strerror(errno)); + return -1; + } + + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + if (tunn_local[tid].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[tid].l2tp_fd, NULL); + close(tunn_local[tid].l2tp_fd); + tunn_local[tid].l2tp_fd = -1; + } + + return 0; +} + +// +// Clear all existing tunnels +// +// Unfortunately, tunnels survive us, so we have to drop any tunnel left from a +// previous instance that might have crashed. +static void delete_one_kernel_tunnel(uint32_t id1, uint32_t id2) +{ + queue_delete_kernel_tunnel(id1); +} +static void delete_kernel_tunnels(void) +{ + delete_kernel_items("tunnel", L2TP_CMD_TUNNEL_GET, L2TP_ATTR_CONN_ID, L2TP_ATTR_NONE, delete_one_kernel_tunnel); +} + +// +// Create session in kernel +static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + if (tunn_local[tid].l2tp_fd < 0) + { + /* Didn't create kernel tunnel first */ + errno = ENOENT; + return -1; + } + + LOG(3, sid, tid, "Creating kernel session from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_SESSION_CREATE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_SESSION_ID, &sid, sizeof(sid)); + genetlink_addattr(&req.nh, L2TP_ATTR_PEER_SESSION_ID, &peer_sid, sizeof(peer_sid)); + uint16_t pwtype = L2TP_PWTYPE_PPP; + genetlink_addattr(&req.nh, L2TP_ATTR_PW_TYPE, &pwtype, sizeof(pwtype)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, sid, tid, "Can't create session %d:%d to %d:%d: %s\n", tid, sid, peer_tid, peer_sid, strerror(errno)); + return -1; + } + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for session creation: %s\n", strerror(errno)); + return -1; + } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Queue deleting session in kernel +static int queue_delete_kernel_session(uint32_t tid, uint32_t sid) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + if (genl_l2tp_id < 0) + { + errno = ENOSYS; + return -1; + } + + LOG(3, sid, tid, "Deleting kernel session for %u:%u\n", tid, sid); + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = genl_l2tp_id; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = L2TP_CMD_SESSION_DELETE; + req.glh.version = L2TP_GENL_VERSION; + + genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); + genetlink_addattr(&req.nh, L2TP_ATTR_SESSION_ID, &sid, sizeof(sid)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, sid, tid, "Can't delete session %d:%d: %s\n", tid, sid, strerror(errno)); + return -1; + } + + return 0; +} + +// +// Delete session in kernel +static int delete_kernel_session(uint32_t tid, uint32_t sid) +{ + int ret = queue_delete_kernel_session(tid, sid); + if (ret < 0) + return -1; + + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[64]; + } req; + + ssize_t size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + { + LOG(1, 0, 0, "Can't receive answer for session deletion: %s\n", strerror(errno)); + return -1; + } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + return -1; + + return 0; +} + +// +// Clear all existing sessions +// +// Unfortunately, sessions survive us, so we have to drop any session left from a +// previous instance that might have crashed. +static void delete_one_kernel_session(uint32_t id1, uint32_t id2) +{ + queue_delete_kernel_session(id2, id1); +} +static void delete_kernel_sessions(void) +{ + delete_kernel_items("session", L2TP_CMD_SESSION_GET, L2TP_ATTR_SESSION_ID, L2TP_ATTR_CONN_ID, delete_one_kernel_session); +} + +// +// Create the kernel PPPoX socket +static int create_ppp_socket(int udp_fd, uint32_t tid, uint32_t peer_tid, uint32_t sid, uint32_t peer_sid, const struct sockaddr *dst, socklen_t addrlen) +{ + int pppox_fd; + int ret; + + if (genl_l2tp_id < 0) + return -1; + + LOG(3, sid, tid, "Creating PPPoL2TPsocket from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); + + pppox_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP); + if (pppox_fd < 0) + { + LOG(2, sid, tid, "Can't create PPPoL2TP socket: %s\n", strerror(errno)); + return -1; + } + + struct sockaddr_pppol2tp sax; + memset(&sax, 0, sizeof(sax)); + + sax.sa_family = AF_PPPOX; + sax.sa_protocol = PX_PROTO_OL2TP; + sax.pppol2tp.fd = udp_fd; + memcpy(&sax.pppol2tp.addr, dst, addrlen); + sax.pppol2tp.s_tunnel = tid; + sax.pppol2tp.s_session = sid; + sax.pppol2tp.d_tunnel = peer_tid; + sax.pppol2tp.d_session = peer_sid; + + ret = connect(pppox_fd, (struct sockaddr *)&sax, sizeof(sax)); + if (ret < 0) + { + LOG(2, sid, tid, "Can't connect PPPoL2TP: %s\n", strerror(errno)); + close(pppox_fd); + return -1; + } + + return pppox_fd; +} + +// +// Create the kernel session and PPPoX socket for this session +static int create_kernel_pppox(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (tunn_local[t].l2tp_fd < 0) + /* Tunnel not set up yet */ + return -1; + + tunnelidt tfar = tunnel[t].far; + sessionidt sfar = session[s].far; + + LOG(3, s, t, "Creating kernel-accelerated pppox socket from %u:%u to %u:%u\n", t, s, tfar, sfar); + + if (create_kernel_session(t, tfar, s, sfar) < 0) + return -1; + + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int pppox_fd = create_ppp_socket(tunn_local[t].l2tp_fd, t, tfar, s, sfar, (struct sockaddr *) &tunneladdr, sizeof(tunneladdr)); + if (pppox_fd < 0) + return -1; + + return pppox_fd; +} + +// +// Get the kernel PPP channel +static int get_kernel_ppp_chan(sessionidt s, int pppox_fd) +{ + int ret; + int chindx; + + ret = ioctl(pppox_fd, PPPIOCGCHAN, &chindx); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get pppox_fd chan: %s\n", strerror(errno)); + return -1; + } + + return chindx; +} + +// +// Get the kernel PPP channel fd +static int create_kernel_ppp_chan(sessionidt s, int pppox_fd) +{ + int chindx = get_kernel_ppp_chan(s, pppox_fd); + int ret; + + int ppp_chan_fd = open("/dev/ppp", O_RDWR); + + LOG(3, s, session[s].tunnel, "Creating PPP channel\n"); + + ret = fcntl(ppp_chan_fd, F_GETFL, NULL); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get ppp chan flags: %s\n", strerror(errno)); + close(ppp_chan_fd); + return -1; + } + ret = fcntl(ppp_chan_fd, F_SETFL, ret | O_NONBLOCK); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't set ppp chan flags: %s\n", strerror(errno)); + close(ppp_chan_fd); + return -1; + } + + ret = ioctl(ppp_chan_fd, PPPIOCATTCHAN, &chindx); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't attach channel %d: %s\n", chindx, strerror(errno)); + close(ppp_chan_fd); + return -1; + } + + return ppp_chan_fd; +} + +// +// Create the kernel PPP interface +static int create_kernel_ppp_if(sessionidt s, int ppp_chan_fd, int *ifunit) +{ + int ppp_if_fd = open("/dev/ppp", O_RDWR); + int ret; + + LOG(3, s, session[s].tunnel, "Creating PPP interface\n"); + + ret = fcntl(ppp_if_fd, F_GETFL, NULL); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't get ppp if flags: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + ret = fcntl(ppp_if_fd, F_SETFL, ret | O_NONBLOCK); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't set ppp if flags: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + + ret = ioctl(ppp_if_fd, PPPIOCNEWUNIT, ifunit); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't create ppp interface: %s\n", strerror(errno)); + close(ppp_if_fd); + return -1; + } + + ret = ioctl(ppp_chan_fd, PPPIOCCONNECT, ifunit); + if (ret < 0) + { + LOG(2, s, session[s].tunnel, "Can't attach channel to unit %d: %s\n", *ifunit, strerror(errno)); + close(ppp_if_fd); + return -1; + } + + return ppp_if_fd; +} + +// +// Tell whether we can try to enable PPP acceleration +static int can_kernel_accel(sessionidt s) +{ + if (!config->kernel_accel) + /* Disabled */ + return 0; + + if (session[s].bundle) + /* MPPP not supported yet */ + return 0; + + if (session[s].throttle_in || session[s].throttle_out) + /* Throttling not supported */ + return 0; + + if (session[s].filter_in || session[s].filter_out) + /* Filtering not supported */ + return 0; + + if (session[s].snoop_ip) + /* Snooping not supported */ + return 0; + + if (session[s].walled_garden) + /* Walled garden not supported */ + return 0; + + /* Looks ok! */ + return 1; +} + +// +// Create the kernel PPP acceleration +static int create_kernel_accel(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (sess_local[s].ppp_if_idx) + /* Already set up */ + return 0; + + if (!can_kernel_accel(s)) + return -1; + + int pppox_fd = create_kernel_pppox(s); + if (pppox_fd < 0) + return -1; + + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + if (ppp_chan_fd < 0) + goto err_pppox_fd; + + int ifunit = -1; + int ppp_if_fd = create_kernel_ppp_if(s, ppp_chan_fd, &ifunit); + if (ppp_if_fd < 0) + goto err_chan_fd; + + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), PPP_IF_PREFIX"%u", ifunit); + if (ioctl(tunn_local[t].l2tp_fd, SIOCGIFINDEX, &ifr) < 0) + { + LOG(2, s, t, "Can't get if index of %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + if (setupif(ifr.ifr_ifindex, session[s].mru, 0)) + { + LOG(2, s, t, "Can't configure %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + struct epoll_event e; + e.events = EPOLLIN; + + static struct event_data d1[MAXSESSION]; + d1[s].type = FD_TYPE_PPPOX; + d1[s].index = s; + e.data.ptr = &d1[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, pppox_fd, &e); + + static struct event_data d2[MAXSESSION]; + d2[s].type = FD_TYPE_PPP_CHAN; + d2[s].index = s; + e.data.ptr = &d2[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_chan_fd, &e); + + static struct event_data d3[MAXSESSION]; + d3[s].type = FD_TYPE_PPP_IF; + d3[s].index = s; + e.data.ptr = &d3[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_if_fd, &e); + + sess_local[s].pppox_fd = pppox_fd; + sess_local[s].ppp_chan_fd = ppp_chan_fd; + sess_local[s].ppp_if_fd = ppp_if_fd; + sess_local[s].ppp_if_unit = ifunit; + sess_local[s].ppp_if_idx = ifr.ifr_ifindex; + + dhcpv6_listen(ifr.ifr_ifindex); + icmpv6_listen(ifr.ifr_ifindex); + + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); + + return 0; + +err_if_fd: + close(ppp_if_fd); +err_chan_fd: + close(ppp_chan_fd); +err_pppox_fd: + close(pppox_fd); + return -1; +} + +// +// Create the kernel PPP accelerated bridge +int create_kernel_bridge(sessionidt s, sessionidt fwds) +{ + static int kernel_cant = 0; + + tunnelidt t = session[s].tunnel; + + if (fwds == s) + /* Meaningless! */ + return -1; + + if (kernel_cant) + /* We have seen that kernel can't do it anyway */ + return -1; + + if (sess_local[s].pppox_fd >= 0) + /* Already set up */ + return 0; + + if (!can_kernel_accel(s) || !can_kernel_accel(fwds)) + return -1; + + int pppox_fd = create_kernel_pppox(s); + if (pppox_fd < 0) + return -1; + + int fwd_pppox_fd = create_kernel_pppox(fwds); + if (fwd_pppox_fd < 0) + goto err_pppox_fd; + + LOG(3, s, t, "Starting kernel-accelerated bridge between %u and %u\n", s, fwds); + + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + if (ppp_chan_fd < 0) + goto err_fwd_pppox_fd; + + int fwd_idx = get_kernel_ppp_chan(fwds, fwd_pppox_fd); + + int ret = ioctl(ppp_chan_fd, PPPIOCBRIDGECHAN, &fwd_idx); + close(ppp_chan_fd); + if (ret < 0) { + if (errno == ENOTTY) + /* Not supported by kernel */ + kernel_cant = 1; + + LOG(2, s, session[s].tunnel, "Can't set LAC bridge: %s\n", strerror(errno)); + goto err_fwd_pppox_fd; + } + + struct epoll_event e; + e.events = EPOLLIN; + + static struct event_data d1[MAXSESSION]; + d1[s].type = FD_TYPE_PPPOX; + d1[s].index = s; + e.data.ptr = &d1[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, pppox_fd, &e); + + d1[fwds].type = FD_TYPE_PPPOX; + d1[fwds].index = fwds; + e.data.ptr = &d1[fwds]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, fwd_pppox_fd, &e); + + sess_local[s].pppox_fd = pppox_fd; + sess_local[fwds].pppox_fd = fwd_pppox_fd; + + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); + memset(&sess_local[fwds].last_stats, 0, sizeof(sess_local[fwds].last_stats)); + + return 0; + +err_fwd_pppox_fd: + close(fwd_pppox_fd); +err_pppox_fd: + close(pppox_fd); + return -1; +} + +// +// Delete the kernel PPP acceleration +static int delete_kernel_accel(sessionidt s) +{ + if (sess_local[s].pppox_fd < 0) + /* Already stopped */ + return 0; + + LOG(3, s, session[s].tunnel, "Stopping kernel-accelerated support for %u:%u\n", session[s].tunnel, s); + + sess_local[s].ppp_if_unit = -1; + sess_local[s].ppp_if_idx = 0; + + if (sess_local[s].ppp_chan_fd >= 0) + ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); + + if (sess_local[s].ppp_if_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_if_fd, NULL); + close(sess_local[s].ppp_if_fd); + sess_local[s].ppp_if_fd = -1; + } + + if (sess_local[s].ppp_chan_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_chan_fd, NULL); + close(sess_local[s].ppp_chan_fd); + sess_local[s].ppp_chan_fd = -1; + } + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].pppox_fd, NULL); + close(sess_local[s].pppox_fd); + sess_local[s].pppox_fd = -1; + + delete_kernel_session(session[s].tunnel, s); + + return 0; +} + +// +// Enable (set=1) or disable (set=0) kernel PPP acceleration +// This basically calls create/delete_kernel_accel, but also updates routes +// If now is 0, we may delay this if we have already made a lot of switches since last cleanup +static void set_kernel_accel(sessionidt s, int set, int now) +{ + if (set && !can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + tunnelidt t = session[s].tunnel; + sessionidt fwds = session[s].forwardtosession; + + if (set) + { + if (tunnel[t].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + + if (fwds) + { + if (session[fwds].forwardtosession != s) + /* We don't know the other session yet */ + return; + + tunnelidt fwdt = session[fwds].tunnel; + if (tunnel[fwdt].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + } + } + + if (set && !now && kernel_switches >= MAX_KERNEL_SWITCHES) + { + // We already performed many switches, throttle a bit by just + // marking as pending + sess_local[s].needs_switch = 1; + return; + } + kernel_switches++; + sess_local[s].needs_switch = 0; + + routesset(s, &session[s], 0); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 0); + + if (set) + { + create_kernel_tunnel(t, tunnel[t].far); + + if (fwds) + { + tunnelidt fwdt = session[fwds].tunnel; + create_kernel_tunnel(fwdt, tunnel[fwdt].far); + create_kernel_bridge(s, fwds); + } + else + create_kernel_accel(s); + } + else + { + delete_kernel_accel(s); + if (fwds) + delete_kernel_accel(fwds); + } + + routesset(s, &session[s], 1); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 1); +} + +// +// Try to enable/disable PPP acceleration as allowed +// This is typically called when switching a parameter that changes whether +// acceleration is allowed, e.g. snoop +void switch_kernel_accel(sessionidt s) +{ + if (sess_local[s].pppox_fd < 0) + { + /* Acceleration disabled */ + + if (!can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + /* Try to enable */ + set_kernel_accel(s, 1, 0); + } + else + { + /* Acceleration enabled */ + + if (can_kernel_accel(s)) + /* Still allowed to enable it */ + return; + + /* Has to disable it */ + set_kernel_accel(s, 0, 1); + } +} + +// +// Get traffic statistics from kernel and apply to our counters +static void apply_kernel_stats(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (session[s].tunnel == T_FREE) + /* It is free */ + return; + + if (sess_local[s].pppox_fd < 0) + /* It does not have kernel acceleration */ + return; + + struct pppol2tp_ioc_stats stats, *last_stats = &sess_local[s].last_stats; + int ret = ioctl(sess_local[s].pppox_fd, PPPIOCGL2TPSTATS, &stats); + if (ret < 0) + { + LOG(3, s, t, "Can't get stats with PPPIOCGL2TPSTATS: %s\n", strerror(errno)); + return; + } + + /* Some trafic from peer went through kernel, notice it */ + if (stats.rx_packets - last_stats->rx_packets) + session[s].last_packet = time_now; + + update_session_out_stat(s, + stats.tx_packets - last_stats->tx_packets, + stats.tx_bytes - last_stats->tx_bytes); + // stats.tx_errors + update_session_in_stat(s, + stats.rx_packets - last_stats->rx_packets, + stats.rx_bytes - last_stats->rx_bytes); + // stats.rx_seq_discards + // stats.rx_oos_packets + // stats.rx_errors + + *last_stats = stats; +} + +// Get interface idx for session +static int session_if_idx(sessionidt s) +{ + if (s != 0) + { + int idx = sess_local[s].ppp_if_idx; + if (idx > 0) + // Kernel-accelerated interface + return idx; + } + + // Software interface + return tunidx; +} + // Add a route // // This adds it to the routing table, advertises it @@ -497,20 +1573,21 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); n_ip = htonl(ip); - netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); + rtnetlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); if (gw) { n_ip = htonl(gw); - netlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip)); + rtnetlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip)); } LOG(1, s, session[s].tunnel, "Route %s %s/%d%s%s\n", add ? "add" : "del", fmtaddr(htonl(ip), 0), prefixlen, gw ? " via" : "", gw ? fmtaddr(htonl(gw), 2) : ""); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) LOG(0, 0, 0, "routeset() error in sending netlink message: %s\n", strerror(errno)); #ifdef BGP @@ -538,6 +1615,51 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in } } +// +// Add or remove the routes for a session +static void routesset(sessionidt s, sessiont *sp, int add) +{ + int r; + int routed = 0; + + // add/remove routes... + for (r = 0; r < MAXROUTE && sp->route[r].ip; r++) + { + if ((sp->ip >> (32-sp->route[r].prefixlen)) == + (sp->route[r].ip >> (32-sp->route[r].prefixlen))) + routed++; + + routeset(s, sp->route[r].ip, sp->route[r].prefixlen, 0, add); + } + + // ...ip + if (sp->ip) + { + // Static IPs need to be routed if not already + // convered by a Framed-Route. Anything else is part + // of the IP address pool and is already routed, it + // just needs to be added to the IP cache. + // IPv6 route setup is done in ppp.c, when IPV6CP is acked. + if (sp->ip_pool_index == -1) // static ip + { + if (!routed) routeset(s, sp->ip, 0, 0, add); + } + else // It's part of the IP pool, add/remove it manually. + { + if (add) + cache_ipmap(sp->ip, s); + else + uncache_ipmap(sp->ip); + } + } +} + +// Add an IPv6 route +// +// This adds it to the routing table, advertises it +// via BGP if enabled, and stuffs it into the +// 'sessionbyip' cache. +// void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) { struct { @@ -576,17 +1698,18 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); - netlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); + rtnetlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); metric = 1; - netlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); + rtnetlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); LOG(1, s, session[s].tunnel, "Route %s %s/%d\n", add ? "add" : "del", inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), prefixlen); - if (netlink_send(&req.nh) < 0) + if (rtnetlink_send(&req.nh) < 0) LOG(0, 0, 0, "route6set() error in sending netlink message: %s\n", strerror(errno)); #ifdef BGP @@ -607,16 +1730,127 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) return; } +// +// Add or remove the IPv6 routes for a session +void routes6set(sessionidt s, sessiont *sp, int add) +{ + int r; + + for (r = 0; r < MAXROUTE6 && sp->route6[r].ipv6prefixlen; r++) + { + route6set(s, sp->route6[r].ipv6route, sp->route6[r].ipv6prefixlen, add); + } + + if (sp->ipv6address.s6_addr[0]) + { + // Check if included in prefix + if (!add || sessionbyipv6(sp->ipv6address) != s) + route6set(s, sp->ipv6address, 128, add); + } + else + { + in_addr_t addr_ipv4 = htonl(session[s].ip); + struct in6_addr addr; + memset(&addr, 0, sizeof(addr)); + memcpy(&addr, &config->ipv6_prefix, 8); + memcpy(&addr.s6_addr[8], &addr_ipv4, 4); + route6set(s, addr, 96, add); + } +} + +// +// Get L2TP netlink id +static int16_t netlink_get_l2tp_id(void) +{ + struct { + struct nlmsghdr nh; + struct genlmsghdr glh; + char data[32]; + } req; + struct nlattr *ah; + int16_t ret; + + if (system("modprobe l2tp_ppp")) + LOG(3, 0, 0, "Can't modprobe l2tp_ppp: %s\n", strerror(errno)); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_type = GENL_ID_CTRL; + req.nh.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.glh)); + + req.glh.cmd = CTRL_CMD_GETFAMILY; + req.glh.version = 1; + + genetlink_addattr(&req.nh, CTRL_ATTR_FAMILY_NAME, L2TP_GENL_NAME, sizeof(L2TP_GENL_NAME)); + + assert(req.nh.nlmsg_len < sizeof(req)); + + if (genetlink_send(&req.nh) < 0) + { + LOG(2, 0, 0, "Can't send request for l2tp netlink name: %s\n", strerror(errno)); + return -1; + } + + + ssize_t size = genetlink_recv(&req.nh, sizeof(req)); + if (size < 0) + { + LOG(2, 0, 0, "Can't receive answer for l2tp netlink name: %s\n", strerror(errno)); + return -1; + } + if (size < sizeof(req.nh)) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + return -1; + } + + if (req.nh.nlmsg_type != GENL_ID_CTRL) + { + LOG(2, 0, 0, "Unexpected answer type %d for l2tp netlink name.\n" + "Does your Linux kernel have the l2tp_netlink module available?\n", req.nh.nlmsg_type); + return -1; + } + if (size < NLMSG_HDRLEN + GENL_HDRLEN) + { + LOG(2, 0, 0, "Short answer for l2tp netlink name\n"); + return -1; + } + + size -= NLMSG_HDRLEN + GENL_HDRLEN; + ret = -1; + char *data = &req.data[0]; + for (ah = (void*) data; (char*) ah < data + size; ah = (void*) ((char *) ah + NLA_ALIGN(ah->nla_len))) + { + if ((ah->nla_type & NLA_TYPE_MASK) == CTRL_ATTR_FAMILY_ID) + { + if (ah->nla_len < NLA_HDRLEN + 2) + LOG(2, 0, 0, "Short netlink family ID for l2tp\n"); + ret = *(uint16_t*) ((char*) ah + NLA_HDRLEN); + break; + } + } + if (ret == -1) + LOG(2, 0, 0, "Did not get netlink family ID for l2tp\n"); + + size = genetlink_recv(&req, sizeof(req)); + if (size < 0) + LOG(2, 0, 0, "Can't receive ack for family ID: %s\n", strerror(errno)); + else + netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL); + + return ret; +} + // // Set up netlink socket static void initnetlink(void) { struct sockaddr_nl nladdr; - nlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (nlfd < 0) + rtnlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rtnlfd < 0) { - LOG(0, 0, 0, "Can't create netlink socket: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't create route netlink socket: %s\n", strerror(errno)); exit(1); } @@ -624,21 +1858,49 @@ static void initnetlink(void) nladdr.nl_family = AF_NETLINK; nladdr.nl_pid = getpid(); - if (bind(nlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + if (bind(rtnlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) { - LOG(0, 0, 0, "Can't bind netlink socket: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't bind route netlink socket: %s\n", strerror(errno)); exit(1); } + + genlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (genlfd < 0) + { + LOG(0, 0, 0, "Can't create generic netlink socket: %s\n", strerror(errno)); + exit(1); + } + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = getpid(); + + if (bind(genlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + { + LOG(0, 0, 0, "Can't bind generic netlink socket: %s\n", strerror(errno)); + exit(1); + } + + genl_l2tp_id = netlink_get_l2tp_id(); + LOG(3, 0, 0, "gen l2tp id is %d\n", genl_l2tp_id); + + if (config->kernel_accel) + { + delete_kernel_sessions(); + delete_kernel_tunnels(); + } } -static ssize_t netlink_send(struct nlmsghdr *nh) +// +// Send message to a netlink socket +static ssize_t netlink_send(int fd, int *seqnum, struct nlmsghdr *nh) { struct sockaddr_nl nladdr; struct iovec iov; struct msghdr msg; nh->nlmsg_pid = getpid(); - nh->nlmsg_seq = ++nlseqnum; + nh->nlmsg_seq = ++*seqnum; // set kernel address memset(&nladdr, 0, sizeof(nladdr)); @@ -647,10 +1909,26 @@ static ssize_t netlink_send(struct nlmsghdr *nh) iov = (struct iovec){ (void *)nh, nh->nlmsg_len }; msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; - return sendmsg(nlfd, &msg, 0); + return sendmsg(fd, &msg, 0); } -static ssize_t netlink_recv(void *buf, ssize_t len) +// +// Send message to the route netlink socket +static ssize_t rtnetlink_send(struct nlmsghdr *nh) +{ + return netlink_send(rtnlfd, &rtnlseqnum, nh); +} + +// +// Send message to the generic netlink socket +static ssize_t genetlink_send(struct nlmsghdr *nh) +{ + return netlink_send(genlfd, &genlseqnum, nh); +} + +// +// Receive a message from a netlink socket +static ssize_t netlink_recv(int fd, void *buf, ssize_t len) { struct sockaddr_nl nladdr; struct iovec iov; @@ -663,11 +1941,65 @@ static ssize_t netlink_recv(void *buf, ssize_t len) iov = (struct iovec){ buf, len }; msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; - return recvmsg(nlfd, &msg, 0); + return recvmsg(fd, &msg, 0); } +// +// Receive a message from the route netlink socket +static ssize_t rtnetlink_recv(void *buf, ssize_t len) +{ + return netlink_recv(rtnlfd, buf, len); +} + +// +// Receive a message from the generic netlink socket +static ssize_t genetlink_recv(void *buf, ssize_t len) +{ + return netlink_recv(genlfd, buf, len); +} + +// +// Look ack netlink message for errors +static int netlink_handle_ack(struct nlmsghdr *nh, int gen, int min_initok_nlseqnum, char *tun_nl_phase_msg[]) +{ + if (nh->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *errmsg = NLMSG_DATA(nh); + if (errmsg->error) + { + if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum) + { + LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error)); + exit(1); + } + else + { + if (gen) + { + struct genlmsghdr *glh = NLMSG_DATA(&errmsg->msg); + LOG(0, 0, 0, "For generic netlink request %d on %d, got a netlink error: %s\n", glh->cmd, errmsg->msg.nlmsg_type, strerror(-errmsg->error)); + } + else + LOG(0, 0, 0, "For netlink request %d, got a netlink error: %s\n", errmsg->msg.nlmsg_type, strerror(-errmsg->error)); + errno = -errmsg->error; + return -1; + } + } + // else it's an ack + return 0; + } + else + { + LOG(1, 0, 0, "Got an unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags); + errno = EIO; + return -1; + } +} + +// +// Add an attribute to a message for a route netlink socket /* adapted from iproute2 */ -static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) +static void rtnetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) { int len = RTA_LENGTH(alen); struct rtattr *rta; @@ -679,8 +2011,44 @@ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(len); } +// +// Add an attribute to a message for a generic netlink socket +static void genetlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) +{ + int len = NLA_HDRLEN + alen; + struct nlattr *nla; + + nla = (struct nlattr *)(((void *)nh) + NLMSG_ALIGN(nh->nlmsg_len)); + nla->nla_type = type; + nla->nla_len = len; + memcpy((char*)nla + NLA_HDRLEN, data, alen); + nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(len); +} + +// +// Find attribute from a message +static int genetlink_getattr(struct nlmsghdr *nh, int type, void *attr, int alen) +{ + char *glh = NLMSG_DATA(nh); + char *data = glh + GENL_HDRLEN; + char *data_end = data + nh->nlmsg_len - NLMSG_HDRLEN - GENL_HDRLEN; + struct nlattr *ah; + + for (ah = (void*) data; (char*) ah < data_end; ah = (void*) ((char *) ah + NLA_ALIGN(ah->nla_len))) + { + if ((ah->nla_type & NLA_TYPE_MASK) == type) + { + if (ah->nla_len != NLA_HDRLEN + alen) + LOG(0, 0, 0, "Erroneous attribute %d size\n", type); + memcpy(attr, ((char*) ah + NLA_HDRLEN), alen); + return 0; + } + } + return -1; +} + // messages corresponding to different phases seq number -static char *tun_nl_phase_msg[] = { +static char *tun_rtnl_phase_msg[] = { "initialized", "getting tun interface index", "setting tun interface parameters", @@ -726,41 +2094,51 @@ static void inittun(void) LOG(0, 0, 0, "Can't get tun interface index\n"); exit(1); } - + if (setupif(tunidx, MRU, 1) < 0) { - struct { - // interface setting - struct nlmsghdr nh; - union { - struct ifinfomsg ifinfo; - struct ifaddrmsg ifaddr; - } ifmsg; - char rtdata[32]; // 32 should be enough - } req; - uint32_t txqlen, mtu; - in_addr_t ip; + LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno)); + exit(1); + } +} - memset(&req, 0, sizeof(req)); +// +// Set up an interface for serving as gateway +static int setupif(int ifidx, uint32_t mru, int config_addr) +{ + struct { + // interface setting + struct nlmsghdr nh; + union { + struct ifinfomsg ifinfo; + struct ifaddrmsg ifaddr; + } ifmsg; + char rtdata[32]; // 32 should be enough + } req; + uint32_t txqlen; + in_addr_t ip; - req.nh.nlmsg_type = RTM_NEWLINK; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo)); + memset(&req, 0, sizeof(req)); - req.ifmsg.ifinfo.ifi_family = AF_UNSPEC; - req.ifmsg.ifinfo.ifi_index = tunidx; - req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up - req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo)); - /* Bump up the qlen to deal with bursts from the network */ - txqlen = 1000; - netlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); - /* set MTU to modem MRU */ - mtu = MRU; - netlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu)); + req.ifmsg.ifinfo.ifi_family = AF_UNSPEC; + req.ifmsg.ifinfo.ifi_index = ifidx; + req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up + req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag - if (netlink_send(&req.nh) < 0) - goto senderror; + /* Bump up the qlen to deal with bursts from the network */ + txqlen = 1000; + rtnetlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); + /* set MTU to modem MRU */ + rtnetlink_addattr(&req.nh, IFLA_MTU, &mru, sizeof(mru)); + if (rtnetlink_send(&req.nh) < 0) + return -1; + + if (config_addr) + { memset(&req, 0, sizeof(req)); req.nh.nlmsg_type = RTM_NEWADDR; @@ -770,7 +2148,7 @@ static void inittun(void) req.ifmsg.ifaddr.ifa_family = AF_INET; req.ifmsg.ifaddr.ifa_prefixlen = 32; req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; - req.ifmsg.ifaddr.ifa_index = tunidx; + req.ifmsg.ifaddr.ifa_index = ifidx; if (config->nbmultiaddress > 1) { @@ -778,9 +2156,9 @@ static void inittun(void) for (i = 0; i < config->nbmultiaddress ; i++) { ip = config->iftun_n_address[i]; - netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); - if (netlink_send(&req.nh) < 0) - goto senderror; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + if (rtnetlink_send(&req.nh) < 0) + return -1; } } else @@ -789,84 +2167,138 @@ static void inittun(void) ip = config->iftun_address; else ip = 0x01010101; // 1.1.1.1 - netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); - if (netlink_send(&req.nh) < 0) - goto senderror; + if (rtnetlink_send(&req.nh) < 0) + return -1; } + } - - - // Only setup IPv6 on the tun device if we have a configured prefix - if (config->ipv6_prefix.s6_addr[0]) { - struct in6_addr ip6; - - memset(&req, 0, sizeof(req)); - - req.nh.nlmsg_type = RTM_NEWADDR; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); - - req.ifmsg.ifaddr.ifa_family = AF_INET6; - req.ifmsg.ifaddr.ifa_prefixlen = 64; - req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK; - req.ifmsg.ifaddr.ifa_index = tunidx; - - // Link local address is FE80::1 - memset(&ip6, 0, sizeof(ip6)); - ip6.s6_addr[0] = 0xFE; - ip6.s6_addr[1] = 0x80; - ip6.s6_addr[15] = 1; - netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - - if (netlink_send(&req.nh) < 0) - goto senderror; - - memset(&req, 0, sizeof(req)); - - req.nh.nlmsg_type = RTM_NEWADDR; - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); - - req.ifmsg.ifaddr.ifa_family = AF_INET6; - req.ifmsg.ifaddr.ifa_prefixlen = 64; - req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; - req.ifmsg.ifaddr.ifa_index = tunidx; - - // Global address is prefix::1 - ip6 = config->ipv6_prefix; - ip6.s6_addr[15] = 1; - netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); - - if (netlink_send(&req.nh) < 0) - goto senderror; - } + // Only setup IPv6 on the tun device if we have a configured prefix + if (config->ipv6_prefix.s6_addr[0]) { + struct in6_addr ip6; memset(&req, 0, sizeof(req)); - req.nh.nlmsg_type = NLMSG_DONE; - req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); - if (netlink_send(&req.nh) < 0) - goto senderror; + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK; + req.ifmsg.ifaddr.ifa_index = ifidx; - // if we get an error for seqnum < min_initok_nlseqnum, - // we must exit as initialization went wrong - if (config->ipv6_prefix.s6_addr[0]) - min_initok_nlseqnum = 5 + 1; // idx + if + addr + 2*addr6 - else - min_initok_nlseqnum = 3 + 1; // idx + if + addr + // Link local address is FE80::1 + memset(&ip6, 0, sizeof(ip6)); + ip6.s6_addr[0] = 0xFE; + ip6.s6_addr[1] = 0x80; + ip6.s6_addr[15] = 1; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifidx; + + // Global address is prefix::1 + ip6 = config->ipv6_prefix; + ip6.s6_addr[15] = 1; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + + if (rtnetlink_send(&req.nh) < 0) + return -1; } - return; + memset(&req, 0, sizeof(req)); -senderror: - LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno)); - exit(1); + req.nh.nlmsg_type = NLMSG_DONE; + req.nh.nlmsg_len = NLMSG_LENGTH(0); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + + // if we get an error for seqnum < min_initok_nlseqnum, + // we must exit as initialization went wrong + if (config->ipv6_prefix.s6_addr[0]) + min_initok_rtnlseqnum = 5 + 1; // idx + if + addr + 2*addr6 + else + min_initok_rtnlseqnum = 3 + 1; // idx + if + addr + + return 0; +} + +// +// Quickly drop the gateway from the interface +static int disableif(int ifidx) +{ + struct { + // interface setting + struct nlmsghdr nh; + union { + struct ifinfomsg ifinfo; + struct ifaddrmsg ifaddr; + } ifmsg; + char rtdata[32]; // 32 should be enough + } req; + in_addr_t ip; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_DELADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET; + req.ifmsg.ifaddr.ifa_prefixlen = 32; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifidx; + + if (config->nbmultiaddress > 1) + { + int i; + for (i = 0; i < config->nbmultiaddress ; i++) + { + ip = config->iftun_n_address[i]; + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + if (rtnetlink_send(&req.nh) < 0) + return -1; + } + } + else + { + if (config->iftun_address) + ip = config->iftun_address; + else + ip = 0x01010101; // 1.1.1.1 + rtnetlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = NLMSG_DONE; + req.nh.nlmsg_len = NLMSG_LENGTH(0); + + if (rtnetlink_send(&req.nh) < 0) + return -1; + + return 0; } // set up LAC UDP ports -static void initlacudp(void) +static int initlacudp(int *pudpfd, in_addr_t ip_dest, uint16_t port_dest) { int on = 1; struct sockaddr_in addr; @@ -876,17 +2308,30 @@ static void initlacudp(void) addr.sin_family = AF_INET; addr.sin_port = htons(config->bind_portremotelns); addr.sin_addr.s_addr = config->bind_address_remotelns; - udplacfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - setsockopt(udplacfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + *pudpfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + setsockopt(*pudpfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); { - int flags = fcntl(udplacfd, F_GETFL, 0); - fcntl(udplacfd, F_SETFL, flags | O_NONBLOCK); + int flags = fcntl(*pudpfd, F_GETFL, 0); + fcntl(*pudpfd, F_SETFL, flags | O_NONBLOCK); } - if (bind(udplacfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + if (bind(*pudpfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { LOG(0, 0, 0, "Error in UDP REMOTE LNS bind: %s\n", strerror(errno)); - exit(1); + close(*pudpfd); + return -1; } + if (ip_dest) + { + addr.sin_port = port_dest; + addr.sin_addr.s_addr = ip_dest; + if (connect(*pudpfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + { + LOG(2, 0, 0, "Error in UDP REMOTE LNS connect: %s\n", strerror(errno)); + close(*pudpfd); + return -1; + } + } + return 0; } // set up control ports @@ -930,7 +2375,7 @@ static void initdae(void) } // set up UDP ports -static void initudp(int * pudpfd, in_addr_t ip_bind) +static int initudp(int * pudpfd, in_addr_t ip_bind, in_addr_t ip_dest, uint16_t port_dest) { int on = 1; struct sockaddr_in addr; @@ -949,8 +2394,21 @@ static void initudp(int * pudpfd, in_addr_t ip_bind) if (bind((*pudpfd), (struct sockaddr *) &addr, sizeof(addr)) < 0) { LOG(0, 0, 0, "Error in UDP bind: %s\n", strerror(errno)); - exit(1); + close(*pudpfd); + return -1; } + if (ip_dest) + { + addr.sin_port = port_dest; + addr.sin_addr.s_addr = ip_dest; + if (connect((*pudpfd), (struct sockaddr *) &addr, sizeof(addr)) < 0) + { + LOG(2, 0, 0, "Error in UDP connect: %s\n", strerror(errno)); + close(*pudpfd); + return -1; + } + } + return 0; } // @@ -1458,15 +2916,34 @@ void processmpframe(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l, uint8_t e } } -static void update_session_out_stat(sessionidt s, sessiont *sp, int len) +// +// Account for some incoming packets in the session statistics +void update_session_in_stat(sessionidt s, int packets, size_t len) { + sessiont *sp = &session[s]; + + increment_counter(&sp->cin, &sp->cin_wrap, len); // byte count + sp->cin_delta += len; + sp->pin += packets; + sp->last_data = time_now; + + sess_local[s].cin += len; // To send to master.. + sess_local[s].pin += packets; +} + +// +// Account for some outgoing packets in the session statistics +void update_session_out_stat(sessionidt s, int packets, size_t len) +{ + sessiont *sp = &session[s]; + increment_counter(&sp->cout, &sp->cout_wrap, len); // byte count sp->cout_delta += len; - sp->pout++; + sp->pout += packets; sp->last_data = time_now; sess_local[s].cout += len; // To send to master.. - sess_local[s].pout++; + sess_local[s].pout += packets; } // process outgoing (to tunnel) IP @@ -1675,7 +3152,7 @@ void processipout(uint8_t *buf, int len) tunnelsend(fragbuf, fraglen + (p-fragbuf), t); // send it... // statistics - update_session_out_stat(s, sp, fraglen); + update_session_out_stat(s, 1, fraglen); remain -= fraglen; while (remain > last_fraglen) @@ -1688,7 +3165,7 @@ void processipout(uint8_t *buf, int len) p = makeppp(fragbuf, sizeof(fragbuf), buf+(len - remain), fraglen, s, t, PPPIP, 0, bid, 0); if (!p) return; tunnelsend(fragbuf, fraglen + (p-fragbuf), t); // send it... - update_session_out_stat(s, sp, fraglen); + update_session_out_stat(s, 1, fraglen); remain -= fraglen; } // send the last fragment @@ -1700,7 +3177,7 @@ void processipout(uint8_t *buf, int len) p = makeppp(fragbuf, sizeof(fragbuf), buf+(len - remain), remain, s, t, PPPIP, 0, bid, MP_END); if (!p) return; tunnelsend(fragbuf, remain + (p-fragbuf), t); // send it... - update_session_out_stat(s, sp, remain); + update_session_out_stat(s, 1, remain); if (remain != last_fraglen) LOG(3, s, t, "PROCESSIPOUT ERROR REMAIN != LAST_FRAGLEN, %d != %d\n", remain, last_fraglen); } @@ -1711,7 +3188,7 @@ void processipout(uint8_t *buf, int len) if (!p) return; tunnelsend(fragbuf, len + (p-fragbuf), t); // send it... LOG(4, s, t, "MPPP: packet sent as one frame\n"); - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } } else @@ -1719,14 +3196,14 @@ void processipout(uint8_t *buf, int len) // Send it as one frame (NO MPPP Frame) uint8_t *p = opt_makeppp(buf, len, s, t, PPPIP, 0, 0, 0); tunnelsend(p, len + (buf-p), t); // send it... - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } } else { uint8_t *p = opt_makeppp(buf, len, s, t, PPPIP, 0, 0, 0); tunnelsend(p, len + (buf-p), t); // send it... - update_session_out_stat(s, sp, len); + update_session_out_stat(s, 1, len); } // Snooping this session, send it to intercept box @@ -1979,6 +3456,7 @@ static controlt *controlnew(uint16_t mtype) } assert(c); c->next = 0; + c->ns = 0; // only used for OoO receives c->buf[0] = 0xC8; // flags c->buf[1] = 0x02; // ver c->length = 12; @@ -2074,6 +3552,8 @@ void throttle_session(sessionidt s, int rate_in, int rate_out) session[s].throttle_out = rate_out; } + + switch_kernel_accel(s); } // add/remove filters from session (-1 = no change) @@ -2112,6 +3592,8 @@ void filter_session(sessionidt s, int filter_in, int filter_out) session[s].filter_out = filter_out; } + + switch_kernel_accel(s); } // start tidy shutdown of session @@ -2164,38 +3646,27 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e if (session[s].ip) { // IP allocated, clear and unroute - int r; - int routed = 0; - for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) - { - if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == - (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) - routed++; + if (del_routes) + routesset(s, &session[s], 0); - if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 0); + int r; + for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) session[s].route[r].ip = 0; - } if (session[s].ip_pool_index == -1) // static ip - { - if (!routed && del_routes) routeset(s, session[s].ip, 0, 0, 0); session[s].ip = 0; - } else free_ip_address(s); + if (del_routes) + routes6set(s, &session[s], 0); + // unroute IPv6, if setup for (r = 0; r < MAXROUTE6 && session[s].route6[r].ipv6route.s6_addr[0] && session[s].route6[r].ipv6prefixlen; r++) { - if (del_routes) route6set(s, session[s].route6[r].ipv6route, session[s].route6[r].ipv6prefixlen, 0); memset(&session[s].route6[r], 0, sizeof(session[s].route6[r])); } - if (session[s].ipv6address.s6_addr[0] && del_routes) - { - route6set(s, session[s].ipv6address, 128, 0); - } - if (b) { // This session was part of a bundle @@ -2230,7 +3701,6 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e { sessionidt new_s = bundle[b].members[0]; - routed = 0; // Add the route for this session. for (r = 0; r < MAXROUTE && session[new_s].route[r].ip; r++) { @@ -2263,9 +3733,11 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e } cluster_send_bundle(b); - } + } } + delete_kernel_accel(s); + if (session[s].throttle_in || session[s].throttle_out) // Unthrottle if throttled. throttle_session(s, 0, 0); @@ -2366,8 +3838,14 @@ void sendipv6cp(sessionidt s, tunnelidt t) static void sessionclear(sessionidt s) { + delete_kernel_accel(s); + memset(&session[s], 0, sizeof(session[s])); memset(&sess_local[s], 0, sizeof(sess_local[s])); + sess_local[s].pppox_fd = -1; + sess_local[s].ppp_chan_fd = -1; + sess_local[s].ppp_if_fd = -1; + sess_local[s].ppp_if_unit = -1; memset(&cli_session_actions[s], 0, sizeof(cli_session_actions[s])); session[s].tunnel = T_FREE; // Mark it as free. @@ -2413,7 +3891,17 @@ void sessionkill(sessionidt s, char *reason) static void tunnelclear(tunnelidt t) { if (!t) return; + + if (tunn_local[t].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[t].l2tp_fd, NULL); + close(tunn_local[t].l2tp_fd); + } + memset(&tunnel[t], 0, sizeof(tunnel[t])); + memset(&tunn_local[t], 0, sizeof(tunn_local[t])); + tunn_local[t].l2tp_fd = -1; + tunnel[t].state = TUNNELFREE; } @@ -2448,6 +3936,8 @@ static void tunnelkill(tunnelidt t, char *reason) if (session[s].tunnel == t) sessionkill(s, reason); + delete_kernel_tunnel(t); + // free tunnel tunnelclear(t); LOG(1, 0, t, "Kill tunnel %u: %s\n", t, reason); @@ -2475,6 +3965,8 @@ static void tunnelshutdown(tunnelidt t, char *reason, int result, int error, cha if (session[s].tunnel == t) sessionshutdown(s, reason, CDN_NONE, TERM_ADMIN_RESET); + delete_kernel_tunnel(t); + tunnel[t].state = TUNNELDIE; tunnel[t].die = TIME + 700; // Clean up in 70 seconds cluster_send_tunnel(t); @@ -2508,6 +4000,33 @@ static void tunnelshutdown(tunnelidt t, char *reason, int result, int error, cha } } +static void drop_routes(void) +{ + unsigned i; + + LOG(1, 0, 0, "Disabling receiving l2tp\n"); + // Disable receiving l2tp trafic first since we don't forward to master any more + disableif(tunidx); + LOG(1, 0, 0, "Dropping routes\n"); + // Disable receiving Internet trafic + for (i = 1; i <= config->cluster_highest_sessionid ; ++i) + { + routesset(i, &session[i], 0); + routes6set(i, &session[i], 0); + } +} + +// +// We ended up in an odd state, better stop here as quickly as possible before +// causing trouble to the rest of the cluster +// +void crash(void) +{ + kill(0, SIGTERM); + drop_routes(); + exit(1); +} + // read and process packet on tunnel (UDP) void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexudpfd) { @@ -2515,6 +4034,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu uint8_t *recvchalresponse = NULL; uint16_t l = len, t = 0, s = 0, ns = 0, nr = 0; uint8_t *p = buf + 2; + controlt *c; CSTAT(processudp); @@ -2657,19 +4177,63 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu fmtaddr(htonl(tunnel[t].ip), 0), tunnel[t].port, t); } - // If the 'ns' just received is not the 'nr' we're - // expecting, just send an ack and drop it. - // - // if 'ns' is less, then we got a retransmitted packet. - // if 'ns' is greater than missed a packet. Either way - // we should ignore it. + // If the 'ns' just received is is less than the 'nr' + // we're expecting, we got a retransmitted packet. + // Just send an ack and drop it. + if (ns - tunnel[t].nr >= 0x8000u) + { + if (l) // Is this not a ZLB? + controlnull(t); + return; + } + + // If the 'ns' just received is greater than the 'nr' + // we're expecting, we missed a packet. If it's not too + // big and new, store this one to look after it after we + // get the retransmission of the missing piece. if (ns != tunnel[t].nr) { - // is this the sequence we were expecting? STAT(tunnel_rx_errors); LOG(1, 0, t, " Out of sequence tunnel %u, (%u is not the expected %u)\n", t, ns, tunnel[t].nr); + if (tunnel[t].state == TUNNELOPEN + && ns - tunnel[t].nr <= 10 && len <= MAXCONTROL) + { + // Not too big and not too new + controlt **curp; + + LOG(2, 0, t, " Queueing it\n"); + + // Find where to put it in the queue + for (curp = &tunn_local[t].controlr; (c = *curp); curp = &c->next) + { + if (ns == c->ns) + { + LOG(2, 0, t, " We already had this piece\n"); + break; + } + if (ns < c->ns) + { + // The rest is greater than this, put this before + c = NULL; + break; + } + } + + if (curp && !c) + { + // We don't already have this piece, store it + c = controlnew(0); + c->next = *curp; + *curp = c; + c->length = len; + c->ns = ns; + memcpy(c->buf, buf, len); + } + } + + // Tell peer what we have if (l) // Is this not a ZLB? controlnull(t); return; @@ -2681,7 +4245,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu // some to clear maybe? while (tunnel[t].controlc > 0 && (((tunnel[t].ns - tunnel[t].controlc) - nr) & 0x8000)) { - controlt *c = tunnel[t].controls; + c = tunnel[t].controls; tunnel[t].controls = c->next; tunnel[t].controlc--; c->next = controlfree; @@ -2697,7 +4261,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu { // some control packets can now be sent that were previous stuck out of window int tosend = tunnel[t].window - skip; - controlt *c = tunnel[t].controls; + c = tunnel[t].controls; while (c && skip) { c = c->next; @@ -2741,7 +4305,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu STAT(tunnel_rx_errors); free(sendchalresponse); free(recvchalresponse); - return; + goto out; } p += n; // next l -= n; @@ -3198,6 +4762,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu // check authenticator if (memcmp(hash, recvchalresponse, 16) == 0) { + create_kernel_tunnel(t, tunnel[t].far); LOG(3, s, t, "sending SCCCN to REMOTE LNS\n"); controlt *c = controlnew(3); // sending SCCCN controls(c, 7, config->multi_n_hostname[tunnel[t].indexudp][0]?config->multi_n_hostname[tunnel[t].indexudp]:hostname, 1); // host name @@ -3223,6 +4788,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu LOG(3, s, t, "Received SCCN\n"); tunnel[t].state = TUNNELOPEN; tunnel[t].lastrec = time_now; + create_kernel_tunnel(t, tunnel[t].far); controlnull(t); // ack break; case 4: // StopCCN @@ -3295,7 +4861,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu } free(sendchalresponse); free(recvchalresponse); - return; + goto out; case 11: // ICRP LOG(3, s, t, "Received ICRP\n"); if (session[s].forwardtosession) @@ -3307,6 +4873,9 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu session[s].far = asession; session[s].last_packet = session[s].last_data = time_now; + // Now we have the far session number, we can try to enable accelerated forward + create_kernel_bridge(s, session[s].forwardtosession); + control32(c, 19, 1, 1); // Framing Type control32(c, 24, 10000000, 1); // Tx Connect Speed controladd(c, asession, t); // send the message @@ -3355,6 +4924,37 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu free(sendchalresponse); free(recvchalresponse); cluster_send_tunnel(t); + +out: + // We processed a control packet, check if we can process the OoO queue + + c = tunn_local[t].controlr; + while (c && c->ns - tunnel[t].nr >= 0x8000u) + { + // We received this again in the meanwhile! Drop. + LOG(2, 0, t, " We received again %u, drop\n", c->ns); + + tunn_local[t].controlr = c->next; + c->next = controlfree; + controlfree = c; + + c = tunn_local[t].controlr; + } + + if (c && c->ns == tunnel[t].nr) + { + // We caught up with what we saved for later! Dequeue this. + LOG(2, 0, t, " We caught up with %u\n", c->ns); + tunn_local[t].controlr = c->next; + + // And process it. + // Note: this might recurse for the rest of the queue, but the + // queue is bound and while processing it we are not queueing more. + processudp(c->buf, c->length, addr, indexudpfd); + + c->next = controlfree; + controlfree = c; + } } else { @@ -3362,181 +4962,217 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu } } else - { // data - uint16_t proto; + // data + processppp(s, buf, len, p, l, addr, indexudpfd); +} - LOG_HEX(5, "Receive Tunnel Data", p, l); - if (l > 2 && p[0] == 0xFF && p[1] == 0x03) - { // HDLC address header, discard - p += 2; - l -= 2; - } - if (l < 2) - { - LOG(1, s, t, "Short ppp length %d\n", l); - STAT(tunnel_rx_errors); - return; - } - if (*p & 1) - { - proto = *p++; - l--; - } - else - { - proto = ntohs(*(uint16_t *) p); - p += 2; - l -= 2; - } +// +// process a ppp frame coming from tunnel +static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, struct sockaddr_in *addr, uint16_t indexudpfd) +{ // data + int t = session[s].tunnel; + uint16_t proto; - if (session[s].forwardtosession) - { - LOG(5, s, t, "Forwarding data session to session %u\n", session[s].forwardtosession); - // Forward to LAC/BAS or Remote LNS session - lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - else if (config->auth_tunnel_change_addr_src) - { - if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && - tunnel[t].port == ntohs(addr->sin_port)) - { - // The remotes BAS are a clustered l2tpns server and the source IP has changed - LOG(5, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", - fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); + LOG_HEX(5, "Receive Tunnel Data", p, l); + if (l > 2 && p[0] == 0xFF && p[1] == 0x03) + { // HDLC address header, discard + p += 2; + l -= 2; + } + if (l < 2) + { + LOG(1, s, t, "Short ppp length %d\n", l); + STAT(tunnel_rx_errors); + return; + } + if (*p & 1) + { + proto = *p++; + l--; + } + else + { + proto = ntohs(*(uint16_t *) p); + p += 2; + l -= 2; + } - tunnel[t].ip = ntohl(addr->sin_addr.s_addr); - } - } + if (session[s].forwardtosession) + { + LOG(5, s, t, "Forwarding data session to session %u\n", session[s].forwardtosession); + // Forward to LAC/BAS or Remote LNS session + lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } - if (s && !session[s].opened) // Is something wrong?? + if (s && !session[s].opened) // Is something wrong?? + { + if (!config->cluster_iam_master) { - if (!config->cluster_iam_master) - { - // Pass it off to the master to deal with.. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - LOG(1, s, t, "UDP packet contains session which is not opened. Dropping packet.\n"); - STAT(tunnel_rx_errors); + // Pass it off to the master to deal with.. + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - if (proto == PPPPAP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processpap(s, t, p, l); - } - else if (proto == PPPCHAP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processchap(s, t, p, l); - } - else if (proto == PPPLCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processlcp(s, t, p, l); - } - else if (proto == PPPIPCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processipcp(s, t, p, l); - } - else if (proto == PPPIPV6CP && config->ipv6_prefix.s6_addr[0]) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processipv6cp(s, t, p, l); - } - else if (proto == PPPCCP) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - processccp(s, t, p, l); - } - else if (proto == PPPIP) - { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } + LOG(1, s, t, "UDP packet from %s contains session which is not opened. Dropping packet.\n", fmtaddr(addr->sin_addr.s_addr, 0)); + STAT(tunnel_rx_errors); + return; + } - session[s].last_packet = session[s].last_data = time_now; - if (session[s].walled_garden && !config->cluster_iam_master) - { - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - processipin(s, t, p, l); - } - else if (proto == PPPMP) + if (config->auth_tunnel_change_addr_src) + { + if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && + tunnel[t].port == ntohs(addr->sin_port)) { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } + // The remotes BAS are a clustered l2tpns server and the source IP has changed + LOG(2, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", + fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 1)); - session[s].last_packet = session[s].last_data = time_now; - if (!config->cluster_iam_master) - { - // The fragments reconstruction is managed by the Master. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } + tunnel[t].ip = ntohl(addr->sin_addr.s_addr); - processmpin(s, t, p, l); - } - else if (proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0]) - { - if (session[s].die) - { - LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); - return; // closing session, PPP not processed - } - - session[s].last_packet = session[s].last_data = time_now; - if (session[s].walled_garden && !config->cluster_iam_master) - { - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - - if (!config->cluster_iam_master) - { - // Check if DhcpV6, IP dst: FF02::1:2, Src Port 0x0222 (546), Dst Port 0x0223 (547) - if (*(p + 6) == 17 && *(p + 24) == 0xFF && *(p + 25) == 2 && - *(uint32_t *)(p + 26) == 0 && *(uint32_t *)(p + 30) == 0 && - *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && - *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) - { - // DHCPV6 must be managed by the Master. - master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); - return; - } - } - - processipv6in(s, t, p, l); - } - else if (session[s].ppp.lcp == Opened) - { - session[s].last_packet = time_now; - if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - protoreject(s, t, p, l, proto); - } - else - { - LOG(2, s, t, "Unknown PPP protocol 0x%04X received in LCP %s state\n", - proto, ppp_state(session[s].ppp.lcp)); + update_kernel_tunnel(s, t); + cluster_send_tunnel(t); } } + + if (proto == PPPPAP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processpap(s, t, p, l); + } + else if (proto == PPPCHAP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processchap(s, t, p, l); + } + else if (proto == PPPLCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processlcp(s, t, p, l); + } + else if (proto == PPPIPCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processipcp(s, t, p, l); + } + else if (proto == PPPIPV6CP && config->ipv6_prefix.s6_addr[0]) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processipv6cp(s, t, p, l); + } + else if (proto == PPPCCP) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + processccp(s, t, p, l); + } + else if (proto == PPPIP) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (session[s].walled_garden && !config->cluster_iam_master) + { + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + processipin(s, t, p, l); + } + else if (proto == PPPMP) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (!config->cluster_iam_master) + { + // The fragments reconstruction is managed by the Master. + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + processmpin(s, t, p, l); + } + else if (proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0]) + { + if (session[s].die) + { + LOG(4, s, t, "Session %u is closing. Don't process PPP packets\n", s); + return; // closing session, PPP not processed + } + + session[s].last_packet = session[s].last_data = time_now; + if (session[s].walled_garden && !config->cluster_iam_master) + { + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + + if (!config->cluster_iam_master) + { + // Check if DhcpV6, IP dst: FF02::1:2, Src Port 0x0222 (546), Dst Port 0x0223 (547) + if (*(p + 6) == 17 && *(p + 24) == 0xFF && *(p + 25) == 2 && + *(uint32_t *)(p + 26) == 0 && *(uint32_t *)(p + 30) == 0 && + *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && + *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) + { + // DHCPV6 must be managed by the Master. + master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); + return; + } + } + + processipv6in(s, t, p, l); + } + else if (session[s].ppp.lcp == Opened) + { + session[s].last_packet = time_now; + if (!config->cluster_iam_master) { master_forward_packet(buf, len, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } + protoreject(s, t, p, l, proto); + } + else + { + LOG(2, s, t, "Unknown PPP protocol 0x%04X received in LCP %s state\n", + proto, ppp_state(session[s].ppp.lcp)); + } +} + +static void processppp_from_kernel(sessionidt s, uint8_t *p, int l, struct sockaddr_in *addr) +{ + tunnelidt t = session[s].tunnel; + int indexudpfd = tunnel[t].indexudp; + struct sockaddr_in defaddr; + + /* Create L2TP header */ + uint16_t *w = (uint16_t *)p - 3; + w[0] = htons(0x0002); /* L2TP data */ + w[1] = htons(t); + w[2] = htons(s); + + if (!addr) + { + /* This is coming from the kernel socket, so it's coming from the address it is bound to */ + memset(&defaddr, 0, sizeof(defaddr)); + defaddr.sin_family = AF_INET; + defaddr.sin_addr.s_addr = htonl(tunnel[t].ip); + defaddr.sin_port = htons(tunnel[t].port); + addr = &defaddr; + } + + processppp(s, (uint8_t *) w, l + 6, p, l, addr, indexudpfd); } // read and process packet on tun @@ -3911,6 +5547,7 @@ static void regular_cleanups(double period) LOG(2, s, session[s].tunnel, "Unsnooping session by CLI\n"); session[s].snoop_ip = 0; session[s].snoop_port = 0; + switch_kernel_accel(s); s_actions++; send++; } @@ -3922,6 +5559,7 @@ static void regular_cleanups(double period) session[s].snoop_ip = cli_session_actions[s].snoop_ip; session[s].snoop_port = cli_session_actions[s].snoop_port; + switch_kernel_accel(s); s_actions++; send++; } @@ -4100,15 +5738,8 @@ static int still_busy(void) return 0; } -#ifdef HAVE_EPOLL -# include -#else -# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ -# include "fake_epoll.h" -#endif - -// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess -#define BASE_FDS (9 + MAX_UDPFD) +// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, dhcpv6, icmpv6 +#define BASE_FDS (11 + MAX_UDPFD) // additional polled fds #ifdef BGP @@ -4117,16 +5748,26 @@ static int still_busy(void) # define EXTRA_FDS 0 #endif +#define L2TP_FDS MAXTUNNEL +#define PPPOX_FDS MAXSESSION +#define PPP_CHAN_FDS MAXSESSION +#define PPP_IF_FDS MAXSESSION + +#define MAX_FDS (BASE_FDS + RADIUS_FDS + EXTRA_FDS + L2TP_FDS + PPPOX_FDS + PPP_CHAN_FDS + PPP_IF_FDS) + +// for the header of the forwarded MPPP/DHCP packet (see C_MPPP_FORWARD) +#define SLACK 56 + // main loop - gets packets on tun or udp and processes them static void mainloop(void) { int i, j; uint8_t buf[65536]; - uint8_t *p = buf + 32; // for the header of the forwarded MPPP packet (see C_MPPP_FORWARD) + uint8_t *p = buf + SLACK; // for the header of the forwarded MPPP packet (see C_MPPP_FORWARD) // and the forwarded pppoe session - int size_bufp = sizeof(buf) - 32; + int size_bufp = sizeof(buf) - SLACK; clockt next_cluster_ping = 0; // send initial ping immediately - struct epoll_event events[BASE_FDS + RADIUS_FDS + EXTRA_FDS]; + struct epoll_event events[MAX_FDS]; int maxevent = sizeof(events)/sizeof(*events); if ((epollfd = epoll_create(maxevent)) < 0) @@ -4135,8 +5776,8 @@ static void mainloop(void) exit(1); } - LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, nlfd=%d , udplacfd=%d, pppoefd=%d, pppoesessfd=%d\n", - clifd, cluster_sockfd, tunfd, udpfd[0], controlfd, daefd, nlfd, udplacfd, pppoediscfd, pppoesessfd); + LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, rtnlfd=%d , udplacfd=%d, pppoefd=%d, pppoesessfd=%d\n", + clifd, cluster_sockfd, tunfd, udpfd[0], controlfd, daefd, rtnlfd, udplacfd, pppoediscfd, pppoesessfd); /* setup our fds to poll for input */ { @@ -4169,9 +5810,9 @@ static void mainloop(void) e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, daefd, &e); - d[i].type = FD_TYPE_NETLINK; + d[i].type = FD_TYPE_RTNETLINK; e.data.ptr = &d[i++]; - epoll_ctl(epollfd, EPOLL_CTL_ADD, nlfd, &e); + epoll_ctl(epollfd, EPOLL_CTL_ADD, rtnlfd, &e); d[i].type = FD_TYPE_PPPOEDISC; e.data.ptr = &d[i++]; @@ -4181,6 +5822,14 @@ static void mainloop(void) e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, pppoesessfd, &e); + d[i].type = FD_TYPE_DHCPV6; + e.data.ptr = &d[i++]; + epoll_ctl(epollfd, EPOLL_CTL_ADD, dhcpv6fd, &e); + + d[i].type = FD_TYPE_ICMPV6; + e.data.ptr = &d[i++]; + epoll_ctl(epollfd, EPOLL_CTL_ADD, icmpv6fd, &e); + for (j = 0; j < config->nbudpfd; j++) { d[i].type = FD_TYPE_UDP; @@ -4299,14 +5948,14 @@ static void mainloop(void) case FD_TYPE_CONTROL: // nsctl commands alen = sizeof(addr); - s = recvfromto(controlfd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local); + s = recvfromto(controlfd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local, NULL); if (s > 0) processcontrol(p, s, &addr, alen, &local); n--; break; case FD_TYPE_DAE: // DAE requests alen = sizeof(addr); - s = recvfromto(daefd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local); + s = recvfromto(daefd, p, size_bufp, MSG_WAITALL, (struct sockaddr *) &addr, &alen, &local, NULL); if (s > 0) processdae(p, s, &addr, alen, &local); n--; break; @@ -4334,31 +5983,124 @@ static void mainloop(void) break; #endif /* BGP */ - case FD_TYPE_NETLINK: + case FD_TYPE_RTNETLINK: { struct nlmsghdr *nh = (struct nlmsghdr *)p; - s = netlink_recv(p, size_bufp); - if (nh->nlmsg_type == NLMSG_ERROR) + s = rtnetlink_recv(p, size_bufp); + netlink_handle_ack(nh, 0, min_initok_rtnlseqnum, tun_rtnl_phase_msg); + n--; + break; + } + + case FD_TYPE_L2TP: + { + tunnelidt tid = d->index; + if (events[i].events & EPOLLHUP) { - struct nlmsgerr *errmsg = NLMSG_DATA(nh); - if (errmsg->error) - { - if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum) - { - LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error)); - exit(1); - } - else - LOG(0, 0, 0, "Got a netlink error: %s\n", strerror(-errmsg->error)); - } - // else it's a ack + /* Acceleration tunnel got destroyed... Disable it on our side. */ + LOG(1, 0, tid, "L2tp socket got closed!! Disabling kernel acceleration for this tunnel. Are you running two l2tpns instances in the same network namespace?\n"); + + sessionidt sid; + for (sid = 1; sid <= config->cluster_highest_sessionid ; ++sid) + if (session[sid].tunnel == tid) + set_kernel_accel(sid, 0, 1); + + delete_kernel_tunnel(tid); } else - LOG(1, 0, 0, "Got a unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags); + { + alen = sizeof(addr); + s = recvfrom(tunn_local[tid].l2tp_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, 0, tid, "Error on l2tp socket: %s\n", strerror(errno)); + } + else + processudp(p, s, &addr, tunnel[tid].indexudp); + } n--; break; } + case FD_TYPE_PPPOX: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + alen = sizeof(addr); + s = recvfrom(sess_local[sid].pppox_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, sid, tid, "Error on pppox socket: %s\n", strerror(errno)); + set_kernel_accel(sid, 0, 1); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on pppox socket\n"); + set_kernel_accel(sid, 0, 1); + } + else + { + LOG(3, sid, tid, "Got frame on pppox socket?? %02x %02x %02x %02x\n", p[0], p[1], p[2], p[3]); + processppp_from_kernel(sid, p, s, &addr); + } + n--; + break; + } + + case FD_TYPE_PPP_CHAN: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_chan_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp channel: %s\n", strerror(errno)); + set_kernel_accel(sid, 0, 1); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp channel\n"); + set_kernel_accel(sid, 0, 1); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + + case FD_TYPE_PPP_IF: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_if_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp if: %s\n", strerror(errno)); + set_kernel_accel(sid, 0, 1); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp if\n"); + set_kernel_accel(sid, 0, 1); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + + case FD_TYPE_DHCPV6: + { + dhcpv6_process_from_kernel(p, size_bufp); + break; + } + + case FD_TYPE_ICMPV6: + { + icmpv6_process_from_kernel(p, size_bufp); + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } @@ -4480,6 +6222,12 @@ static void mainloop(void) struct param_timer p = { time_now }; run_plugins(PLUGIN_TIMER, &p); } + + sessionidt s; + for (s = 1; s <= config->cluster_highest_sessionid ; ++s) + { + apply_kernel_stats(s); + } } // Runs on every machine (master and slaves). @@ -4501,6 +6249,32 @@ static void mainloop(void) next_cluster_ping = TIME + config->cluster_hb_interval; } + // Handle trying to enable kernel accel + { + static double last_switch = 0; + double this_switch; + double diff; + + TIME = now(&this_switch); + diff = this_switch - last_switch; + + // Run during idle time (after we've handled + // all incoming packets) or every 1/10th sec + if (!more || diff > 0.1) + { + kernel_switches = 0; + + for (i = 1; i <= config->cluster_highest_sessionid; i++) + { + // Delayed kernel switch + if (session[i].ppp.lcp == Opened && sess_local[i].needs_switch) + set_kernel_accel(i, can_kernel_accel(i), 0); + } + + last_switch = this_switch; + } + } + if (!config->cluster_iam_master) continue; @@ -4554,6 +6328,7 @@ static void mainloop(void) } } } + LOG(1, 0, 0, "Leaving...\n"); // Are we the master and shutting down?? if (config->cluster_iam_master) @@ -4569,6 +6344,14 @@ static void mainloop(void) // // Important!!! We MUST not process any packets past this point! + // + + // + // Now drop routes as quickly as possible to lose as few packets as + // possible in the meanwhile + // + drop_routes(); + LOG(1, 0, 0, "Shutdown complete\n"); } @@ -4689,6 +6472,11 @@ static void initdata(int optdebug, char *optconfig) LOG(0, 0, 0, "Error doing malloc for tunnels: %s\n", strerror(errno)); exit(1); } + if (!(tunn_local = shared_malloc(sizeof(tunnellocalt) * MAXTUNNEL))) + { + LOG(0, 0, 0, "Error doing malloc for tunn_local: %s\n", strerror(errno)); + exit(1); + } if (!(bundle = shared_malloc(sizeof(bundlet) * MAXBUNDLE))) { LOG(0, 0, 0, "Error doing malloc for bundles: %s\n", strerror(errno)); @@ -4745,6 +6533,10 @@ static void initdata(int optdebug, char *optconfig) memset(cli_tunnel_actions, 0, sizeof(struct cli_tunnel_actions) * MAXSESSION); memset(tunnel, 0, sizeof(tunnelt) * MAXTUNNEL); + memset(tunn_local, 0, sizeof(tunnellocalt) * MAXTUNNEL); + for (i = 0; i < MAXTUNNEL; i++) { + tunn_local[i].l2tp_fd = -1; + } memset(bundle, 0, sizeof(bundlet) * MAXBUNDLE); memset(session, 0, sizeof(sessiont) * MAXSESSION); memset(radius, 0, sizeof(radiust) * MAXRADIUS); @@ -4755,6 +6547,10 @@ static void initdata(int optdebug, char *optconfig) { session[i].next = i + 1; session[i].tunnel = T_UNDEF; // mark it as not filled in. + sess_local[i].pppox_fd = -1; + sess_local[i].ppp_chan_fd = -1; + sess_local[i].ppp_if_fd = -1; + sess_local[i].ppp_if_unit = -1; } session[MAXSESSION - 1].next = 0; sessionfree = 1; @@ -5235,7 +7031,13 @@ int main(int argc, char *argv[]) rlim.rlim_max = RLIM_INFINITY; // Remove the maximum core size if (setrlimit(RLIMIT_CORE, &rlim) < 0) - LOG(0, 0, 0, "Can't set ulimit: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't set core ulimit: %s\n", strerror(errno)); + + rlim.rlim_cur = MAX_FDS; + rlim.rlim_max = MAX_FDS; + // Lift the maximum file open limit + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) + LOG(0, 0, 0, "Can't set nofile ulimit: %s\n", strerror(errno)); // Make core dumps go to /tmp if(chdir("/tmp")) LOG(0, 0, 0, "Error chdir /tmp: %s\n", strerror(errno)); @@ -5288,8 +7090,12 @@ int main(int argc, char *argv[]) } config->nbudpfd = config->nbmultiaddress; for (i = 0; i < config->nbudpfd; i++) - initudp(&udpfd[i], config->bind_n_address[i]); - initlacudp(); + { + if (initudp(&udpfd[i], config->bind_n_address[i], 0, 0) < 0) + exit(1); + } + if (initlacudp(&udplacfd, 0, 0) < 0) + exit(1); config->indexlacudpfd = config->nbudpfd; udpfd[config->indexlacudpfd] = udplacfd; config->nbudpfd++; @@ -5303,6 +7109,7 @@ int main(int argc, char *argv[]) initrad(); initippool(); dhcpv6_init(); + icmpv6_init(); // seed prng { @@ -5746,6 +7553,12 @@ static void update_config() LOG(0, 0, 0, "Can't write to PID file %s: %s\n", config->pid_file, strerror(errno)); } } + + for (i = 1; i <= config->cluster_highest_sessionid ; ++i) + { + if (session[i].ppp.lcp == Opened) + switch_kernel_accel(i); + } } static void read_config_file() @@ -5770,7 +7583,6 @@ int sessionsetup(sessionidt s, tunnelidt t) in_addr_t ip; char *user; sessionidt i; - int r; CSTAT(sessionsetup); @@ -5846,33 +7658,12 @@ int sessionsetup(sessionidt s, tunnelidt t) } } + create_kernel_accel(s); + // no need to set a route for the same IP address of the bundle if (!session[s].bundle || (bundle[session[s].bundle].num_of_links == 1)) - { - int routed = 0; - // Add the route for this session. - for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) - { - if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == - (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) - routed++; - - routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 1); - } - - // Static IPs need to be routed if not already - // convered by a Framed-Route. Anything else is part - // of the IP address pool and is already routed, it - // just needs to be added to the IP cache. - // IPv6 route setup is done in ppp.c, when IPV6CP is acked. - if (session[s].ip_pool_index == -1) // static ip - { - if (!routed) routeset(s, session[s].ip, 0, 0, 1); - } - else - cache_ipmap(session[s].ip, s); - } + routesset(s, &session[s], 1); sess_local[s].lcp_authtype = 0; // RADIUS authentication complete lcp_open(s, t); // transition to Network phase and send initial IPCP @@ -5898,6 +7689,52 @@ int sessionsetup(sessionidt s, tunnelidt t) return 1; // RADIUS OK and IP allocated, done... } +// +// This tunnel just got dropped on us by the master or something. +// Make sure our tables up up to date... +// +int load_tunnel(tunnelidt t, tunnelt *new) +{ + int dropped = 0; + int ip_changed = 0; + + if (tunnel[t].state != TUNNELFREE && new->state == TUNNELFREE) + dropped = 1; + + // if already connected, check if IP changed + if (tunn_local[t].l2tp_fd >= 0 && (tunnel[t].ip != new->ip || tunnel[t].port != new->port)) + ip_changed = 1; + + memcpy(&tunnel[t], new, sizeof(tunnel[t]) ); + + // + // Clear tunnel control messages. These are dynamically allocated. + // If we get unlucky, this may cause the tunnel to drop! + // + tunnel[t].controls = tunnel[t].controle = tunn_local[t].controlr = NULL; + tunnel[t].controlc = 0; + + if (tunnel[t].state == TUNNELFREE) + { + if (dropped) + delete_kernel_tunnel(t); + } + else + { + create_kernel_tunnel(t, tunnel[t].far); + + if (ip_changed) { + LOG(2, 0, t, "Updating tunnel IP from heartbeat\n"); + update_kernel_tunnel(0, t); + } + + if (t > config->cluster_highest_tunnelid) // Maintain this in the slave too. + config->cluster_highest_tunnelid = t; + } + + return 1; +} + // // This session just got dropped on us by the master or something. // Make sure our tables up up to date... @@ -5906,6 +7743,7 @@ int load_session(sessionidt s, sessiont *new) { int i; int newip = 0; + int newsession = 0; // Sanity checks. if (new->ip_pool_index >= MAXIPPOOL || @@ -5921,6 +7759,11 @@ int load_session(sessionidt s, sessiont *new) // loading the new session. // + if (new->tunnel != session[s].tunnel || + new->far != session[s].far) + // This is a new session + newsession = 1; + session[s].tunnel = new->tunnel; // For logging in cache_ipmap // See if routes/ip cache need updating @@ -5935,80 +7778,26 @@ int load_session(sessionidt s, sessiont *new) // needs update if (newip) { - int routed = 0; - // remove old routes... - for (i = 0; i < MAXROUTE && session[s].route[i].ip; i++) - { - if ((session[s].ip >> (32-session[s].route[i].prefixlen)) == - (session[s].route[i].ip >> (32-session[s].route[i].prefixlen))) - routed++; - - routeset(s, session[s].route[i].ip, session[s].route[i].prefixlen, 0, 0); - } - - // ...ip - if (session[s].ip) - { - if (session[s].ip_pool_index == -1) // static IP - { - if (!routed) routeset(s, session[s].ip, 0, 0, 0); - } - else // It's part of the IP pool, remove it manually. - uncache_ipmap(session[s].ip); - } + routesset(s, &session[s], 0); // remove old IPV6 routes... - for (i = 0; i < MAXROUTE6 && session[s].route6[i].ipv6route.s6_addr[0] && session[s].route6[i].ipv6prefixlen; i++) - { - route6set(s, session[s].route6[i].ipv6route, session[s].route6[i].ipv6prefixlen, 0); - } + routes6set(s, &session[s], 0); + } - if (session[s].ipv6address.s6_addr[0]) - { - route6set(s, session[s].ipv6address, 128, 0); - } - - routed = 0; + if (newsession) + // The session changed, drop existing kernel acceleration + delete_kernel_accel(s); + if (newip) + { // add new routes... - for (i = 0; i < MAXROUTE && new->route[i].ip; i++) - { - if ((new->ip >> (32-new->route[i].prefixlen)) == - (new->route[i].ip >> (32-new->route[i].prefixlen))) - routed++; - - routeset(s, new->route[i].ip, new->route[i].prefixlen, 0, 1); - } - - // ...ip - if (new->ip) - { - // If there's a new one, add it. - if (new->ip_pool_index == -1) - { - if (!routed) routeset(s, new->ip, 0, 0, 1); - } - else - cache_ipmap(new->ip, s); - } + routesset(s, new, 1); } // check v6 routing if (new->ppp.ipv6cp == Opened && session[s].ppp.ipv6cp != Opened) - { - for (i = 0; i < MAXROUTE6 && new->route6[i].ipv6prefixlen; i++) - { - route6set(s, new->route6[i].ipv6route, new->route6[i].ipv6prefixlen, 1); - } - } - - if (new->ipv6address.s6_addr[0] && new->ppp.ipv6cp == Opened && session[s].ppp.ipv6cp != Opened) - { - // Check if included in prefix - if (sessionbyipv6(new->ipv6address) != s) - route6set(s, new->ipv6address, 128, 1); - } + routes6set(s, new, 1); // check filters if (new->filter_in && (new->filter_in > MAXFILTER || !ip_filters[new->filter_in - 1].name[0])) @@ -6045,6 +7834,9 @@ int load_session(sessionidt s, sessiont *new) if (new->ip_pool_index != -1) fix_address_pool(s); + // and try to enable kernel acceleration + switch_kernel_accel(s); + return 1; } diff --git a/l2tpns.h b/l2tpns.h index 53ace04..253633e 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include #define VERSION "2.4.1" @@ -28,6 +31,8 @@ // Tunnel Id reserved for pppoe #define TUNNEL_ID_PPPOE 1 +#define PPP_IF_PREFIX "ppp" + #define RADIUS_SHIFT 6 #define RADIUS_FDS (1 << RADIUS_SHIFT) #define RADIUS_MASK ((1 << RADIUS_SHIFT) - 1) @@ -270,6 +275,7 @@ typedef struct controls // control message { struct controls *next; // next in queue uint16_t length; // length + uint16_t ns; // sequence number uint8_t buf[MAXCONTROL]; } controlt; @@ -436,6 +442,9 @@ typedef struct // last LCP Echo time_t last_echo; + // Whether we tried to suggest the IPV6CP identifier option. + int tried_identifier; + // last unsolicited RA sent to user time_t last_ra; @@ -446,6 +455,23 @@ typedef struct uint32_t jitteravg; // time in milliseconds of the last fragment. uint64_t prev_time; + + // Pending kernel switch + int needs_switch; + + // l2tp PPPoL2TP socket + int pppox_fd; + struct pppol2tp_ioc_stats last_stats; + + // ppp channel + int ppp_chan_fd; + + // ppp interface + int ppp_if_fd; + // ppp interface number (ppp%d) + int ppp_if_unit; + // ppp interface index (for rtnetlink etc.) + int ppp_if_idx; } sessionlocalt; // session flags @@ -479,6 +505,13 @@ typedef struct } tunnelt; +typedef struct +{ + controlt *controlr; // queue of OoO-received messages + int l2tp_fd; // kernel acceleration UDP socket +} +tunnellocalt; + // 164 bytes per radius session typedef struct // outstanding RADIUS requests { @@ -821,6 +854,7 @@ typedef struct uint32_t dhcp6_server_duid; // DUID of dhcpv6 server (see rfc3315) uint32_t dns6_lifetime; // RDNSS lifetime default 1200 (see rfc6106, rfc4861) (MaxRtrAdvInterval <= Lifetime <= 2*MaxRtrAdvInterval) char default_ipv6_domain_list[255]; + int kernel_accel; // Enable kernel-accelerated support } configt; enum config_typet { INT, STRING, UNSIGNED_LONG, SHORT, BOOL, IPv4, IPv6 }; @@ -960,8 +994,9 @@ int rad_tunnel_pwdecode(uint8_t *pl2tpsecret, size_t *pl2tpsecretlen, const char // l2tpns.c clockt backoff(uint8_t try); -void send_ipv6_ra(sessionidt s, tunnelidt t, struct in6_addr *ip); +void send_ipv6_ra(sessionidt s, tunnelidt t, const struct in6_addr *ip); void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add); +void routes6set(sessionidt s, sessiont *sp, int add); sessionidt sessionbyip(in_addr_t ip); sessionidt sessionbyipv6(struct in6_addr ip); sessionidt sessionbyipv6new(struct in6_addr ip); @@ -979,6 +1014,8 @@ void adjust_tcp6_mss(sessionidt s, tunnelidt t, uint8_t *buf, int len, uint8_t * void sendipcp(sessionidt s, tunnelidt t); void sendipv6cp(sessionidt s, tunnelidt t); void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexudpfd); +void update_session_in_stat(sessionidt s, int packets, size_t len); +void update_session_out_stat(sessionidt s, int packets, size_t len); void processipout(uint8_t *buf, int len); void snoop_send_packet(uint8_t *packet, uint16_t size, in_addr_t destination, uint16_t port); int find_filter(char const *name, size_t len); @@ -1005,8 +1042,11 @@ int sessionsetup(sessionidt s, tunnelidt t); int run_plugins(int plugin_type, void *data); void rebuild_address_pool(void); void throttle_session(sessionidt s, int rate_in, int rate_out); +int load_tunnel(tunnelidt, tunnelt *); int load_session(sessionidt, sessiont *); +int create_kernel_bridge(sessionidt s, sessionidt fwds); void become_master(void); // We're the master; kick off any required master initializations. +void crash(void); // We messed up. Die. // cli.c @@ -1018,6 +1058,10 @@ int cli_arg_help(struct cli_def *cli, int cr_ok, char *entry, ...); // icmp.c +extern int icmpv6fd; +void icmpv6_init(void); +void icmpv6_listen(int ifidx); +void icmpv6_process_from_kernel(uint8_t *p, size_t size_bufp); void host_unreachable(in_addr_t destination, uint16_t id, in_addr_t source, uint8_t *packet, int packet_len); @@ -1025,6 +1069,7 @@ extern tunnelt *tunnel; extern bundlet *bundle; extern sessiont *session; extern sessionlocalt *sess_local; +extern tunnellocalt *tunn_local; extern ippoolt *ip_address_pool; #define sessionfree (session[0].next) @@ -1037,7 +1082,10 @@ extern uint32_t last_id; extern struct Tstats *_statistics; extern in_addr_t my_address; extern int clifd; +extern int rtnlfd; +extern int genlfd; extern int epollfd; +extern FILE *log_stream; struct event_data { enum { @@ -1049,9 +1097,15 @@ struct event_data { FD_TYPE_DAE, FD_TYPE_RADIUS, FD_TYPE_BGP, - FD_TYPE_NETLINK, + FD_TYPE_RTNETLINK, FD_TYPE_PPPOEDISC, - FD_TYPE_PPPOESESS + FD_TYPE_PPPOESESS, + FD_TYPE_L2TP, + FD_TYPE_PPPOX, + FD_TYPE_PPP_CHAN, + FD_TYPE_PPP_IF, + FD_TYPE_DHCPV6, + FD_TYPE_ICMPV6, } type; int index; // for RADIUS, BGP, UDP }; diff --git a/ppp.c b/ppp.c index 08c2ef0..7f5a5d2 100644 --- a/ppp.c +++ b/ppp.c @@ -1479,21 +1479,10 @@ void processipcp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) static void ipv6cp_open(sessionidt s, tunnelidt t) { - int i; LOG(3, s, t, "IPV6CP: Opened\n"); change_state(s, ipv6cp, Opened); - for (i = 0; i < MAXROUTE6 && session[s].route6[i].ipv6prefixlen; i++) - { - route6set(s, session[s].route6[i].ipv6route, session[s].route6[i].ipv6prefixlen, 1); - } - - if (session[s].ipv6address.s6_addr[0]) - { - // Check if included in prefix - if (sessionbyipv6(session[s].ipv6address) != s) - route6set(s, session[s].ipv6address, 128, 1); - } + routes6set(s, &session[s], 1); // Send an initial RA send_ipv6_ra(s, t, NULL); @@ -1571,6 +1560,17 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) int gotip = 0; uint32_t ident[2]; + if (session[s].ipv6address.s6_addr[0]) + { + // LSB 64bits of assigned IPv6 address to user (see radius attribut Framed-IPv6-Address) + memcpy(&ident[0], &session[s].ipv6address.s6_addr[8], 8); + } + else + { + ident[0] = htonl(session[s].ip); + ident[1] = 0; + } + while (length > 2) { if (!o[1] || o[1] > length) return; @@ -1581,17 +1581,6 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) gotip++; // seen address if (o[1] != 10) return; - if (session[s].ipv6address.s6_addr[0]) - { - // LSB 64bits of assigned IPv6 address to user (see radius attribut Framed-IPv6-Address) - memcpy(&ident[0], &session[s].ipv6address.s6_addr[8], 8); - } - else - { - ident[0] = htonl(session[s].ip); - ident[1] = 0; - } - if (memcmp(o + 2, ident, sizeof(ident))) { q = ppp_conf_nak(s, b, sizeof(b), PPPIPV6CP, &response, q, p, o, (uint8_t *)ident, sizeof(ident)); @@ -1610,24 +1599,30 @@ void processipv6cp(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) o += o[1]; } + if (!response && !gotip && sess_local[s].tried_identifier++ < 2) + { + uint8_t identifier_option[6] = { 1, 6 }; + + // No interface identifier option, try to suggest one + q = ppp_conf_nak(s, b, sizeof(b), PPPIPV6CP, &response, q, p, identifier_option, (uint8_t *)ident, sizeof(ident)); + if (!q) return; + } + if (response) { l = q - response; // IPV6CP packet length *((uint16_t *) (response + 2)) = htons(l); // update header } - else if (gotip) + else { + if (!gotip) + LOG(2, s, t, "No interface identifier in IPV6CP request, hoping for the best\n"); + // Send packet back as ConfigAck response = makeppp(b, sizeof(b), p, l, s, t, PPPIPV6CP, 0, 0, 0); if (!response) return; *response = ConfigAck; } - else - { - LOG(3, s, t, "No interface identifier in IPV6CP request\n"); - STAT(tunnel_rx_errors); - return; - } switch (session[s].ppp.ipv6cp) { @@ -1722,12 +1717,7 @@ static void update_sessions_in_stat(sessionidt s, uint16_t l) bundleidt b = session[s].bundle; if (!b) { - increment_counter(&session[s].cin, &session[s].cin_wrap, l); - session[s].cin_delta += l; - session[s].pin++; - - sess_local[s].cin += l; - sess_local[s].pin++; + update_session_in_stat(s, 1, l); } else { @@ -1737,12 +1727,8 @@ static void update_sessions_in_stat(sessionidt s, uint16_t l) { l = frag[b].fragment[i].length; s = frag[b].fragment[i].sid; - increment_counter(&session[s].cin, &session[s].cin_wrap, l); - session[s].cin_delta += l; - session[s].pin++; + update_session_in_stat(s, 1, l); - sess_local[s].cin += l; - sess_local[s].pin++; if (i == end) return; i = (i + 1) & MAXFRAGNUM_MASK; @@ -2306,7 +2292,7 @@ void processipv6in(sessionidt s, tunnelidt t, uint8_t *p, uint16_t l) *(uint16_t *)(p + 34) == 0 && *(p + 36) == 0 && *(p + 37) == 1 && *(p + 38) == 0 && *(p + 39) == 2 && *(p + 40) == 2 && *(p + 41) == 0x22 && *(p + 42) == 2 && *(p + 43) == 0x23) { - dhcpv6_process(s, t, p, l); + dhcpv6_process_from_ipv6(s, t, p, l); return; } @@ -2383,12 +2369,7 @@ void send_ipin(sessionidt s, uint8_t *buf, int len) } // Increment packet counters - increment_counter(&session[s].cin, &session[s].cin_wrap, len); - session[s].cin_delta += len; - session[s].pin++; - - sess_local[s].cin += len; - sess_local[s].pin++; + update_session_in_stat(s, 1, len); eth_tx += len; diff --git a/pppoe.c b/pppoe.c index 935bd0f..c60f4fb 100644 --- a/pppoe.c +++ b/pppoe.c @@ -955,21 +955,9 @@ static void pppoe_forwardto_session_rmlns(uint8_t *pack, int size, sessionidt se if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { - session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, ll2tp); - session[sess].cin_delta += ll2tp; - session[sess].pin++; - sess_local[sess].cin += ll2tp; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, ll2tp); // byte count - session[s].cout_delta += ll2tp; - session[s].pout++; - sess_local[s].cout += ll2tp; - sess_local[s].pout++; + session[sess].last_packet = time_now; + update_session_in_stat(s, 1, ll2tp); + update_session_out_stat(s, 1, ll2tp); } else session[sess].last_packet = time_now; @@ -1026,21 +1014,9 @@ void pppoe_forwardto_session_pppoe(uint8_t *pack, int size, sessionidt sess, uin if ((proto == PPPIP) || (proto == PPPMP) ||(proto == PPPIPV6 && config->ipv6_prefix.s6_addr[0])) { - session[sess].last_packet = session[sess].last_data = time_now; - // Update STAT IN - increment_counter(&session[sess].cin, &session[sess].cin_wrap, lpppoe); - session[sess].cin_delta += lpppoe; - session[sess].pin++; - sess_local[sess].cin += lpppoe; - sess_local[sess].pin++; - - session[s].last_data = time_now; - // Update STAT OUT - increment_counter(&session[s].cout, &session[s].cout_wrap, lpppoe); // byte count - session[s].cout_delta += lpppoe; - session[s].pout++; - sess_local[s].cout += lpppoe; - sess_local[s].pout++; + session[sess].last_packet = time_now; + update_session_in_stat(s, 1, lpppoe); + update_session_out_stat(s, 1, lpppoe); } else session[sess].last_packet = time_now; diff --git a/util.c b/util.c index d3b352a..6829203 100644 --- a/util.c +++ b/util.c @@ -1,9 +1,11 @@ /* Misc util functions */ +#define _GNU_SOURCE #include #include #include #include +#include #include #include #include @@ -108,12 +110,8 @@ pid_t fork_and_close() if (udpfd[i] != -1) close(udpfd[i]); } - if (pppoediscfd != -1) close(pppoediscfd); if (controlfd != -1) close(controlfd); if (daefd != -1) close(daefd); - if (snoopfd != -1) close(snoopfd); - if (rand_fd != -1) close(rand_fd); - if (epollfd != -1) close(epollfd); for (i = 0; radfds && i < RADIUS_FDS; i++) close(radfds[i]); @@ -124,44 +122,116 @@ pid_t fork_and_close() close(bgp_peers[i].sock); #endif /* BGP */ + if (rtnlfd != -1) close(rtnlfd); + if (genlfd != -1) close(genlfd); + if (pppoediscfd != -1) close(pppoediscfd); + if (pppoesessfd != -1) close(pppoesessfd); + + for (i = 0; i <= config->cluster_highest_tunnelid; i++) + { + if (tunn_local[i].l2tp_fd >= 0) close(tunn_local[i].l2tp_fd); + } + + for (i = 0; i <= config->cluster_highest_sessionid; i++) + { + if (sess_local[i].pppox_fd >= 0) close(sess_local[i].pppox_fd); + if (sess_local[i].ppp_chan_fd >= 0) close(sess_local[i].ppp_chan_fd); + if (sess_local[i].ppp_if_fd >= 0) close(sess_local[i].ppp_if_fd); + } + + if (dhcpv6fd != -1) close(dhcpv6fd); + if (icmpv6fd != -1) close(icmpv6fd); + if (snoopfd != -1) close(snoopfd); + if (rand_fd != -1) close(rand_fd); + if (epollfd != -1) close(epollfd); + return pid; } +static ssize_t recvfromtox(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, char *cbuf, size_t cbuflen, struct msghdr *msg) +{ + ssize_t r; + struct iovec vec; + + memset(msg, 0, sizeof(*msg)); + msg->msg_name = from; + msg->msg_namelen = *fromlen; + + vec.iov_base = buf; + vec.iov_len = len; + msg->msg_iov = &vec; + msg->msg_iovlen = 1; + msg->msg_flags = 0; + + msg->msg_control = cbuf; + msg->msg_controllen = cbuflen; + + if ((r = recvmsg(s, msg, flags)) < 0) + return r; + + if (fromlen) + *fromlen = msg->msg_namelen; + + return r; +} + ssize_t recvfromto(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr) + struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr, int *ifidx) { ssize_t r; struct msghdr msg; struct cmsghdr *cmsg; - struct iovec vec; - char cbuf[128]; + char cbuf[BUFSIZ]; - memset(&msg, 0, sizeof(msg)); - msg.msg_name = from; - msg.msg_namelen = *fromlen; - - vec.iov_base = buf; - vec.iov_len = len; - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_flags = 0; - - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - - if ((r = recvmsg(s, &msg, flags)) < 0) + if ((r = recvfromtox(s, buf, len, flags, from, fromlen, cbuf, sizeof(cbuf), &msg)) < 0) return r; - if (fromlen) - *fromlen = msg.msg_namelen; - memset(toaddr, 0, sizeof(*toaddr)); + if (toaddr) + memset(toaddr, 0, sizeof(*toaddr)); + if (ifidx) + *ifidx = -1; for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_PKTINFO) { struct in_pktinfo *i = (struct in_pktinfo *) CMSG_DATA(cmsg); - memcpy(toaddr, &i->ipi_addr, sizeof(*toaddr)); + if (toaddr) + memcpy(toaddr, &i->ipi_addr, sizeof(*toaddr)); + if (ifidx) + *ifidx = i->ipi_ifindex; + break; + } + } + + return r; +} + +ssize_t recvfromto6(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, struct in6_addr *toaddr, int *ifidx) +{ + ssize_t r; + struct msghdr msg; + struct cmsghdr *cmsg; + char cbuf[BUFSIZ]; + + if ((r = recvfromtox(s, buf, len, flags, from, fromlen, cbuf, sizeof(cbuf), &msg)) < 0) + return r; + + if (toaddr) + memset(toaddr, 0, sizeof(*toaddr)); + if (ifidx) + *ifidx = -1; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) + { + struct in6_pktinfo *i = (struct in6_pktinfo *) CMSG_DATA(cmsg); + if (toaddr) + memcpy(toaddr, &i->ipi6_addr, sizeof(*toaddr)); + if (ifidx) + *ifidx = i->ipi6_ifindex; break; } } diff --git a/util.h b/util.h index 332c6f8..88ebeaf 100644 --- a/util.h +++ b/util.h @@ -10,6 +10,9 @@ ssize_t sendtofrom(int s, void const *buf, size_t len, int flags, struct sockaddr const *to, socklen_t tolen, struct in_addr const *from); ssize_t recvfromto(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr); + struct sockaddr *from, socklen_t *fromlen, struct in_addr *toaddr, int *ifidx); + +ssize_t recvfromto6(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen, struct in6_addr *toaddr, int *ifidx); #endif /* __UTIL_H__ */