diff --git a/cluster.c b/cluster.c index d68918a..ddfc1fe 100644 --- a/cluster.c +++ b/cluster.c @@ -1364,14 +1364,7 @@ static int cluster_recv_tunnel(int more, uint8_t *p) } } - memcpy(&tunnel[more], p, sizeof(tunnel[more]) ); - - // - // Clear tunnel control messages. These are dynamically allocated. - // If we get unlucky, this may cause the tunnel to drop! - // - tunnel[more].controls = tunnel[more].controle = NULL; - tunnel[more].controlc = 0; + load_tunnel(more, (tunnelt *) p); // Copy tunnel into tunnel table.. LOG(5, 0, more, "Received tunnel update\n"); diff --git a/docs/manpages/startup-config.5 b/docs/manpages/startup-config.5 index e039c15..e1babbd 100644 --- a/docs/manpages/startup-config.5 +++ b/docs/manpages/startup-config.5 @@ -412,6 +412,12 @@ connected users. .PP Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +.PP +\f[B]kernel_accel\f[R] (boolean) +.PP +Determines whether or not to enable kernel acceleration. +Note that only one l2tpns instance can use it per network namespace, +otherwise they will step on each other. .SS DHCPv6 And IPv6 SETTINGS .PP \f[B]dhcp6_preferred_lifetime\f[R] (int) diff --git a/docs/src/man/startup-config.5.md b/docs/src/man/startup-config.5.md index 885470a..60176e3 100644 --- a/docs/src/man/startup-config.5.md +++ b/docs/src/man/startup-config.5.md @@ -251,6 +251,10 @@ The following `variables` may be set: Number of token buckets to allocate for throttling. Each throttled session requires two buckets (in and out). +**kernel\_accel** (boolean) + + Determines whether or not to enable kernel acceleration. Note that only one l2tpns instance can use it per network namespace, otherwise they will step on each other. + ## DHCPv6 And IPv6 SETTINGS **dhcp6\_preferred\_lifetime** (int) diff --git a/etc/startup-config.default b/etc/startup-config.default index 2249454..3b3f223 100644 --- a/etc/startup-config.default +++ b/etc/startup-config.default @@ -146,3 +146,6 @@ set ppp_keepalive yes # Walled garden #load plugin "garden" + +# Kernel acceleration, enable on no more than one instance! +#set kernel_accel yes diff --git a/garden.c b/garden.c index 0e6880c..bbfca34 100644 --- a/garden.c +++ b/garden.c @@ -238,6 +238,8 @@ int garden_session(sessiont *s, int flag, char *newuser) s->walled_garden = 0; + // TODO: try to enable kernel acceleration with switch_kernel_accel(s); + if (flag != F_CLEANUP) { /* OK, we're up! */ diff --git a/l2tpns.c b/l2tpns.c index 1fa32a0..54efdf0 100644 --- a/l2tpns.c +++ b/l2tpns.c @@ -49,6 +49,8 @@ #define PPPIOCUNBRIDGECHAN _IO('t', 54) #endif +#define PPP_IF_PREFIX "ppp" + #include "md5.h" #include "dhcp6.h" #include "l2tpns.h" @@ -68,6 +70,13 @@ #include "pppoe.h" #include "dhcp6.h" +#ifdef HAVE_EPOLL +# include +#else +# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ +# include "fake_epoll.h" +#endif + char * Vendor_name = "Linux L2TPNS"; uint32_t call_serial_number = 0; @@ -215,6 +224,7 @@ config_descriptt config_values[] = { CONFIG("primary_ipv6_dns", default_ipv6_dns1, IPv6), CONFIG("secondary_ipv6_dns", default_ipv6_dns2, IPv6), CONFIG("default_ipv6_domain_list", default_ipv6_domain_list, STRING), + CONFIG("kernel_accel", kernel_accel, BOOL), { NULL, 0, 0, 0 } }; @@ -240,6 +250,7 @@ static sessiont shut_acct[8192]; static sessionidt shut_acct_n = 0; tunnelt *tunnel = NULL; // Array of tunnel structures. +tunnellocalt *tunn_local = NULL; // Array of local per-tunnel structures. bundlet *bundle = NULL; // Array of bundle structures. fragmentationt *frag = NULL; // Array of fragmentation structures. sessiont *session = NULL; // Array of session structures. @@ -612,12 +623,23 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) char data[64]; } req; + if (!config->kernel_accel) + { + /* Disabled */ + errno = EPERM; + return -1; + } + if (genl_l2tp_id < 0) { errno = ENOSYS; return -1; } + if (tunn_local[tid].l2tp_fd >= 0) + /* Already set up */ + return 0; + LOG(3, 0, tid, "Creating kernel tunnel from %u to %u\n", tid, peer_tid); memset(&req, 0, sizeof(req)); @@ -629,7 +651,11 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) req.glh.cmd = L2TP_CMD_TUNNEL_CREATE; req.glh.version = L2TP_GENL_VERSION; - uint32_t fd = udpfd[tunnel[tid].indexudp]; + int fd; + if (initudp(&fd, config->bind_n_address[tunnel[tid].indexudp], + htonl(tunnel[tid].ip), htons(tunnel[tid].port)) < 0) + return -1; + genetlink_addattr(&req.nh, L2TP_ATTR_FD, &fd, sizeof(fd)); genetlink_addattr(&req.nh, L2TP_ATTR_CONN_ID, &tid, sizeof(tid)); genetlink_addattr(&req.nh, L2TP_ATTR_PEER_CONN_ID, &peer_tid, sizeof(peer_tid)); @@ -643,6 +669,7 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) if (genetlink_send(&req.nh) < 0) { LOG(2, 0, tid, "Can't create tunnel %d to %d: %s\n", tid, peer_tid, strerror(errno)); + close(fd); return -1; } @@ -650,12 +677,48 @@ static int create_kernel_tunnel(uint32_t tid, uint32_t peer_tid) if (size < 0) { LOG(1, 0, 0, "Can't receive answer for tunnel creation: %s\n", strerror(errno)); + close(fd); return -1; } if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) + { + close(fd); + return -1; + } + + struct epoll_event e; + static struct event_data d1[MAXTUNNEL]; + e.events = EPOLLIN; + d1[tid].type = FD_TYPE_L2TP; + d1[tid].index = tid; + e.data.ptr = &d1[tid]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &e); + + tunn_local[tid].l2tp_fd = fd; + return 0; +} + +// +// Update remote address of kernel tunnel +static int update_kernel_tunnel(sessionidt s, tunnelidt t) +{ + if (tunn_local[t].l2tp_fd < 0) return -1; + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int ret = connect(tunn_local[t].l2tp_fd, (struct sockaddr *)&tunneladdr, sizeof(tunneladdr)); + if (ret < 0) + { + LOG(2, s, t, "Can't switch tunnel UDP socket: %s\n", strerror(errno)); + return -1; + } return 0; } @@ -719,9 +782,17 @@ static int delete_kernel_tunnel(uint32_t tid) LOG(1, 0, 0, "Can't receive answer for tunnel deletion: %s\n", strerror(errno)); return -1; } + if (netlink_handle_ack((struct nlmsghdr *)&req, 1, 0, NULL) < 0) return -1; + if (tunn_local[tid].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[tid].l2tp_fd, NULL); + close(tunn_local[tid].l2tp_fd); + tunn_local[tid].l2tp_fd = -1; + } + return 0; } @@ -755,6 +826,13 @@ static int create_kernel_session(uint32_t tid, uint32_t peer_tid, uint32_t sid, return -1; } + if (tunn_local[tid].l2tp_fd < 0) + { + /* Didn't create kernel tunnel first */ + errno = ENOENT; + return -1; + } + LOG(3, sid, tid, "Creating kernel session from %u:%u to %u:%u\n", tid, sid, peer_tid, peer_sid); memset(&req, 0, sizeof(req)); @@ -916,6 +994,37 @@ static int create_ppp_socket(int udp_fd, uint32_t tid, uint32_t peer_tid, uint32 return pppox_fd; } +// +// Create the kernel session and PPPoX socket for this session +static int create_kernel_pppox(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (tunn_local[t].l2tp_fd < 0) + /* Tunnel not set up yet */ + return -1; + + tunnelidt tfar = tunnel[t].far; + sessionidt sfar = session[s].far; + + LOG(3, s, t, "Creating kernel-accelerated pppox socket from %u:%u to %u:%u\n", t, s, tfar, sfar); + + if (create_kernel_session(t, tfar, s, sfar) < 0) + return -1; + + struct sockaddr_in tunneladdr; + memset(&tunneladdr, 0, sizeof(tunneladdr)); + tunneladdr.sin_family = AF_INET; + tunneladdr.sin_addr.s_addr = htonl(tunnel[t].ip); + tunneladdr.sin_port = htons(tunnel[t].port); + + int pppox_fd = create_ppp_socket(tunn_local[t].l2tp_fd, t, tfar, s, sfar, (struct sockaddr *) &tunneladdr, sizeof(tunneladdr)); + if (pppox_fd < 0) + return -1; + + return pppox_fd; +} + // // Get the kernel PPP channel static int get_kernel_ppp_chan(sessionidt s, int pppox_fd) @@ -1013,6 +1122,257 @@ static int create_kernel_ppp_if(sessionidt s, int ppp_chan_fd, int *ifunit) return ppp_if_fd; } +// +// Tell whether we can try to enable PPP acceleration +static int can_kernel_accel(sessionidt s) +{ + if (!config->kernel_accel) + /* Disabled */ + return 0; + + if (session[s].bundle) + /* MPPP not supported yet */ + return 0; + + if (session[s].forwardtosession) + /* Forwarding not supported yet */ + return 0; + + if (session[s].throttle_in || session[s].throttle_out) + /* Throttling not supported */ + return 0; + + if (session[s].filter_in || session[s].filter_out) + /* Filtering not supported */ + return 0; + + if (session[s].snoop_ip) + /* Snooping not supported */ + return 0; + + if (session[s].walled_garden) + /* Walled garden not supported */ + return 0; + + /* Looks ok! */ + return 1; +} + +// +// Create the kernel PPP acceleration +static int create_kernel_accel(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (sess_local[s].ppp_if_idx) + /* Already set up */ + return 0; + + if (!can_kernel_accel(s)) + return -1; + + int pppox_fd = create_kernel_pppox(s); + if (pppox_fd < 0) + return -1; + + int ppp_chan_fd = create_kernel_ppp_chan(s, pppox_fd); + if (ppp_chan_fd < 0) + goto err_pppox_fd; + + int ifunit = -1; + int ppp_if_fd = create_kernel_ppp_if(s, ppp_chan_fd, &ifunit); + if (ppp_if_fd < 0) + goto err_chan_fd; + + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), PPP_IF_PREFIX"%u", ifunit); + if (ioctl(tunn_local[t].l2tp_fd, SIOCGIFINDEX, &ifr) < 0) + { + LOG(2, s, t, "Can't get if index of %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + if (setupif(ifr.ifr_ifindex, session[s].mru, 0)) + { + LOG(2, s, t, "Can't configure %s: %s\n", ifr.ifr_name, strerror(errno)); + goto err_if_fd; + } + + struct epoll_event e; + e.events = EPOLLIN; + + static struct event_data d1[MAXSESSION]; + d1[s].type = FD_TYPE_PPPOX; + d1[s].index = s; + e.data.ptr = &d1[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, pppox_fd, &e); + + static struct event_data d2[MAXSESSION]; + d2[s].type = FD_TYPE_PPP_CHAN; + d2[s].index = s; + e.data.ptr = &d2[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_chan_fd, &e); + + static struct event_data d3[MAXSESSION]; + d3[s].type = FD_TYPE_PPP_IF; + d3[s].index = s; + e.data.ptr = &d3[s]; + + epoll_ctl(epollfd, EPOLL_CTL_ADD, ppp_if_fd, &e); + + sess_local[s].pppox_fd = pppox_fd; + sess_local[s].ppp_chan_fd = ppp_chan_fd; + sess_local[s].ppp_if_fd = ppp_if_fd; + sess_local[s].ppp_if_idx = ifr.ifr_ifindex; + + memset(&sess_local[s].last_stats, 0, sizeof(sess_local[s].last_stats)); + + return 0; + +err_if_fd: + close(ppp_if_fd); +err_chan_fd: + close(ppp_chan_fd); +err_pppox_fd: + close(pppox_fd); + return -1; +} + +// +// Delete the kernel PPP acceleration +static int delete_kernel_accel(sessionidt s) +{ + if (!sess_local[s].ppp_if_idx) + /* Already stopped */ + return 0; + + LOG(3, s, session[s].tunnel, "Stopping kernel-accelerated support for %u:%u\n", session[s].tunnel, s); + + sess_local[s].ppp_if_idx = 0; + + ioctl(sess_local[s].ppp_chan_fd, PPPIOCDISCONN); + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_if_fd, NULL); + close(sess_local[s].ppp_if_fd); + sess_local[s].ppp_if_fd = -1; + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].ppp_chan_fd, NULL); + close(sess_local[s].ppp_chan_fd); + sess_local[s].ppp_chan_fd = -1; + + epoll_ctl(epollfd, EPOLL_CTL_DEL, sess_local[s].pppox_fd, NULL); + close(sess_local[s].pppox_fd); + sess_local[s].pppox_fd = -1; + + delete_kernel_session(session[s].tunnel, s); + + return 0; +} + +// +// Enable (set=1) or disable (set=0) kernel PPP acceleration +// This basically calls create/delete_kernel_accel, but also updates routes +static void set_kernel_accel(sessionidt s, int set) +{ + if (set && !can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + tunnelidt t = session[s].tunnel; + if (set && tunnel[t].state == TUNNELUNDEF) + /* We don't know the tunnel yet */ + return; + + routesset(s, &session[s], 0); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 0); + + if (set) + { + create_kernel_tunnel(t, tunnel[t].far); + create_kernel_accel(s); + } + else + delete_kernel_accel(s); + + routesset(s, &session[s], 1); + if (session[s].ppp.ipv6cp == Opened) + routes6set(s, &session[s], 1); +} + +// +// Try to enable/disable PPP acceleration as allowed +// This is typically called when switching a parameter that changes whether +// acceleration is allowed, e.g. snoop +void switch_kernel_accel(sessionidt s) +{ + if (!sess_local[s].ppp_if_idx) + { + /* Acceleration disabled */ + + if (!can_kernel_accel(s)) + /* Still cannot enable it */ + return; + + /* Try to enable */ + set_kernel_accel(s, 1); + } + else + { + /* Acceleration enabled */ + + if (can_kernel_accel(s)) + /* Still allowed to enable it */ + return; + + /* Has to disable it */ + set_kernel_accel(s, 0); + } +} + +// +// Get traffic statistics from kernel and apply to our counters +static void apply_kernel_stats(sessionidt s) +{ + tunnelidt t = session[s].tunnel; + + if (session[s].tunnel == T_FREE) + /* It is free */ + return; + + if (!sess_local[s].ppp_if_idx) + /* It does not have kernel acceleration */ + return; + + struct pppol2tp_ioc_stats stats, *last_stats = &sess_local[s].last_stats; + int ret = ioctl(sess_local[s].pppox_fd, PPPIOCGL2TPSTATS, &stats); + if (ret < 0) + { + LOG(3, s, t, "Can't get stats with PPPIOCGL2TPSTATS: %s\n", strerror(errno)); + return; + } + + /* Some trafic from peer went through kernel, notice it */ + if (stats.rx_packets - last_stats->rx_packets) + session[s].last_packet = time_now; + + update_session_out_stat(s, + stats.tx_packets - last_stats->tx_packets, + stats.tx_bytes - last_stats->tx_bytes); + // stats.tx_errors + update_session_in_stat(s, + stats.rx_packets - last_stats->rx_packets, + stats.rx_bytes - last_stats->rx_bytes); + // stats.rx_seq_discards + // stats.rx_oos_packets + // stats.rx_errors + + *last_stats = stats; +} + // // Bridge kernel channels to accelerate LAC static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) @@ -1031,6 +1391,21 @@ static int bridge_kernel_chans(sessionidt s, int pppox_fd, int pppox_fd2) return 0; } +// Get interface idx for session +static int session_if_idx(sessionidt s) +{ + if (s != 0) + { + int idx = sess_local[s].ppp_if_idx; + if (idx > 0) + // Kernel-accelerated interface + return idx; + } + + // Software interface + return tunidx; +} + // Add a route // // This adds it to the routing table, advertises it @@ -1075,7 +1450,8 @@ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, in req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); n_ip = htonl(ip); rtnetlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); if (gw) @@ -1199,7 +1575,8 @@ void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) req.rt.rtm_scope = RT_SCOPE_LINK; req.rt.rtm_type = RTN_UNICAST; - rtnetlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + int idx = session_if_idx(s); + rtnetlink_addattr(&req.nh, RTA_OIF, &idx, sizeof(idx)); rtnetlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); metric = 1; rtnetlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); @@ -1383,6 +1760,12 @@ static void initnetlink(void) genl_l2tp_id = netlink_get_l2tp_id(); LOG(3, 0, 0, "gen l2tp id is %d\n", genl_l2tp_id); + + if (config->kernel_accel) + { + delete_kernel_sessions(); + delete_kernel_tunnels(); + } } // @@ -2985,6 +3368,8 @@ void throttle_session(sessionidt s, int rate_in, int rate_out) session[s].throttle_out = rate_out; } + + switch_kernel_accel(s); } // add/remove filters from session (-1 = no change) @@ -3023,6 +3408,8 @@ void filter_session(sessionidt s, int filter_in, int filter_out) session[s].filter_out = filter_out; } + + switch_kernel_accel(s); } // start tidy shutdown of session @@ -3162,9 +3549,11 @@ void sessionshutdown(sessionidt s, char const *reason, int cdn_result, int cdn_e } cluster_send_bundle(b); - } + } } + delete_kernel_accel(s); + if (session[s].throttle_in || session[s].throttle_out) // Unthrottle if throttled. throttle_session(s, 0, 0); @@ -3265,8 +3654,13 @@ void sendipv6cp(sessionidt s, tunnelidt t) static void sessionclear(sessionidt s) { + delete_kernel_accel(s); + memset(&session[s], 0, sizeof(session[s])); memset(&sess_local[s], 0, sizeof(sess_local[s])); + sess_local[s].pppox_fd = -1; + sess_local[s].ppp_chan_fd = -1; + sess_local[s].ppp_if_fd = -1; memset(&cli_session_actions[s], 0, sizeof(cli_session_actions[s])); session[s].tunnel = T_FREE; // Mark it as free. @@ -3312,7 +3706,17 @@ void sessionkill(sessionidt s, char *reason) static void tunnelclear(tunnelidt t) { if (!t) return; + + if (tunn_local[t].l2tp_fd >= 0) + { + epoll_ctl(epollfd, EPOLL_CTL_DEL, tunn_local[t].l2tp_fd, NULL); + close(tunn_local[t].l2tp_fd); + } + memset(&tunnel[t], 0, sizeof(tunnel[t])); + memset(&tunn_local[t], 0, sizeof(tunn_local[t])); + tunn_local[t].l2tp_fd = -1; + tunnel[t].state = TUNNELFREE; } @@ -3347,6 +3751,8 @@ static void tunnelkill(tunnelidt t, char *reason) if (session[s].tunnel == t) sessionkill(s, reason); + delete_kernel_tunnel(t); + // free tunnel tunnelclear(t); LOG(1, 0, t, "Kill tunnel %u: %s\n", t, reason); @@ -3374,6 +3780,8 @@ static void tunnelshutdown(tunnelidt t, char *reason, int result, int error, cha if (session[s].tunnel == t) sessionshutdown(s, reason, CDN_NONE, TERM_ADMIN_RESET); + delete_kernel_tunnel(t); + tunnel[t].state = TUNNELDIE; tunnel[t].die = TIME + 700; // Clean up in 70 seconds cluster_send_tunnel(t); @@ -4097,6 +4505,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu // check authenticator if (memcmp(hash, recvchalresponse, 16) == 0) { + create_kernel_tunnel(t, tunnel[t].far); LOG(3, s, t, "sending SCCCN to REMOTE LNS\n"); controlt *c = controlnew(3); // sending SCCCN controls(c, 7, config->multi_n_hostname[tunnel[t].indexudp][0]?config->multi_n_hostname[tunnel[t].indexudp]:hostname, 1); // host name @@ -4122,6 +4531,7 @@ void processudp(uint8_t *buf, int len, struct sockaddr_in *addr, uint16_t indexu LOG(3, s, t, "Received SCCN\n"); tunnel[t].state = TUNNELOPEN; tunnel[t].lastrec = time_now; + create_kernel_tunnel(t, tunnel[t].far); controlnull(t); // ack break; case 4: // StopCCN @@ -4303,18 +4713,6 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s lac_session_forward(buf, len, s, proto, addr->sin_addr.s_addr, addr->sin_port, indexudpfd); return; } - else if (config->auth_tunnel_change_addr_src) - { - if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && - tunnel[t].port == ntohs(addr->sin_port)) - { - // The remotes BAS are a clustered l2tpns server and the source IP has changed - LOG(5, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", - fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); - - tunnel[t].ip = ntohl(addr->sin_addr.s_addr); - } - } if (s && !session[s].opened) // Is something wrong?? { @@ -4330,6 +4728,22 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s return; } + if (config->auth_tunnel_change_addr_src) + { + if (tunnel[t].ip != ntohl(addr->sin_addr.s_addr) && + tunnel[t].port == ntohs(addr->sin_port)) + { + // The remotes BAS are a clustered l2tpns server and the source IP has changed + LOG(2, s, t, "The tunnel IP source (%s) has changed by new IP (%s)\n", + fmtaddr(htonl(tunnel[t].ip), 0), fmtaddr(addr->sin_addr.s_addr, 0)); + + tunnel[t].ip = ntohl(addr->sin_addr.s_addr); + + update_kernel_tunnel(s, t); + cluster_send_tunnel(t); + } + } + if (proto == PPPPAP) { session[s].last_packet = time_now; @@ -4445,6 +4859,31 @@ static void processppp(sessionidt s, uint8_t *buf, int len, uint8_t *p, int l, s } } +static void processppp_from_kernel(sessionidt s, uint8_t *p, int l, struct sockaddr_in *addr) +{ + tunnelidt t = session[s].tunnel; + int indexudpfd = tunnel[t].indexudp; + struct sockaddr_in defaddr; + + /* Create L2TP header */ + uint16_t *w = (uint16_t *)p - 3; + w[0] = htons(0x0002); /* L2TP data */ + w[1] = htons(t); + w[2] = htons(s); + + if (!addr) + { + /* This is coming from the kernel socket, so it's coming from the address it is bound to */ + memset(&defaddr, 0, sizeof(defaddr)); + defaddr.sin_family = AF_INET; + defaddr.sin_addr.s_addr = htonl(tunnel[t].ip); + defaddr.sin_port = htons(tunnel[t].port); + addr = &defaddr; + } + + processppp(s, (uint8_t *) w, l + 6, p, l, addr, indexudpfd); +} + // read and process packet on tun // (i.e. this routine writes to buf[-8]). static void processtun(uint8_t * buf, int len) @@ -4817,6 +5256,7 @@ static void regular_cleanups(double period) LOG(2, s, session[s].tunnel, "Unsnooping session by CLI\n"); session[s].snoop_ip = 0; session[s].snoop_port = 0; + switch_kernel_accel(s); s_actions++; send++; } @@ -4828,6 +5268,7 @@ static void regular_cleanups(double period) session[s].snoop_ip = cli_session_actions[s].snoop_ip; session[s].snoop_port = cli_session_actions[s].snoop_port; + switch_kernel_accel(s); s_actions++; send++; } @@ -5006,14 +5447,7 @@ static int still_busy(void) return 0; } -#ifdef HAVE_EPOLL -# include -#else -# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */ -# include "fake_epoll.h" -#endif - -// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess +// the base set of fds polled: cli, cluster, tun, udp (MAX_UDPFD), control, dae, netlink, udplac, pppoedisc, pppoesess, kernel ppp #define BASE_FDS (9 + MAX_UDPFD) // additional polled fds @@ -5023,6 +5457,13 @@ static int still_busy(void) # define EXTRA_FDS 0 #endif +#define L2TP_FDS MAXTUNNEL +#define PPPOX_FDS MAXSESSION +#define PPP_CHAN_FDS MAXSESSION +#define PPP_IF_FDS MAXSESSION + +#define MAX_FDS (BASE_FDS + RADIUS_FDS + EXTRA_FDS + L2TP_FDS + PPPOX_FDS + PPP_CHAN_FDS + PPP_IF_FDS) + // main loop - gets packets on tun or udp and processes them static void mainloop(void) { @@ -5032,7 +5473,7 @@ static void mainloop(void) // and the forwarded pppoe session int size_bufp = sizeof(buf) - 32; clockt next_cluster_ping = 0; // send initial ping immediately - struct epoll_event events[BASE_FDS + RADIUS_FDS + EXTRA_FDS]; + struct epoll_event events[MAX_FDS]; int maxevent = sizeof(events)/sizeof(*events); if ((epollfd = epoll_create(maxevent)) < 0) @@ -5249,6 +5690,103 @@ static void mainloop(void) break; } + case FD_TYPE_L2TP: + { + tunnelidt tid = d->index; + if (events[i].events & EPOLLHUP) + { + /* Acceleration tunnel got destroyed... Disable it on our side. */ + LOG(1, 0, tid, "L2tp socket got closed!! Disabling kernel acceleration for this tunnel. Are you running two l2tpns instances in the same network namespace?\n"); + + sessionidt sid; + for (sid = 1; sid <= config->cluster_highest_sessionid ; ++sid) + if (session[sid].tunnel == tid) + set_kernel_accel(sid, 0); + + delete_kernel_tunnel(tid); + } + else + { + alen = sizeof(addr); + s = recvfrom(tunn_local[tid].l2tp_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, 0, tid, "Error on l2tp socket: %s\n", strerror(errno)); + } + else + processudp(p, s, &addr, tunnel[tid].indexudp); + } + n--; + break; + } + + case FD_TYPE_PPPOX: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + alen = sizeof(addr); + s = recvfrom(sess_local[sid].pppox_fd, p, size_bufp, 0, (void *) &addr, &alen); + if (s < 0) + { + LOG(1, sid, tid, "Error on pppox socket: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on pppox socket\n"); + set_kernel_accel(sid, 0); + } + else + { + LOG(3, sid, tid, "Got frame on pppox socket?? %02x %02x %02x %02x\n", p[0], p[1], p[2], p[3]); + processppp_from_kernel(sid, p, s, &addr); + } + n--; + break; + } + + case FD_TYPE_PPP_CHAN: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_chan_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp channel: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp channel\n"); + set_kernel_accel(sid, 0); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + + case FD_TYPE_PPP_IF: + { + sessionidt sid = d->index; + tunnelidt tid = session[sid].tunnel; + s = read(sess_local[sid].ppp_if_fd, p, size_bufp); + if (s < 0) + { + LOG(1, sid, tid, "Error on ppp if: %s\n", strerror(errno)); + set_kernel_accel(sid, 0); + } + else if (s == 0) + { + LOG(1, sid, tid, "EOF on ppp if\n"); + set_kernel_accel(sid, 0); + } + else + processppp_from_kernel(sid, p, s, NULL); + n--; + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } @@ -5370,6 +5908,12 @@ static void mainloop(void) struct param_timer p = { time_now }; run_plugins(PLUGIN_TIMER, &p); } + + sessionidt s; + for (s = 1; s <= config->cluster_highest_sessionid ; ++s) + { + apply_kernel_stats(s); + } } // Runs on every machine (master and slaves). @@ -5579,6 +6123,11 @@ static void initdata(int optdebug, char *optconfig) LOG(0, 0, 0, "Error doing malloc for tunnels: %s\n", strerror(errno)); exit(1); } + if (!(tunn_local = shared_malloc(sizeof(tunnellocalt) * MAXTUNNEL))) + { + LOG(0, 0, 0, "Error doing malloc for tunn_local: %s\n", strerror(errno)); + exit(1); + } if (!(bundle = shared_malloc(sizeof(bundlet) * MAXBUNDLE))) { LOG(0, 0, 0, "Error doing malloc for bundles: %s\n", strerror(errno)); @@ -5635,6 +6184,10 @@ static void initdata(int optdebug, char *optconfig) memset(cli_tunnel_actions, 0, sizeof(struct cli_tunnel_actions) * MAXSESSION); memset(tunnel, 0, sizeof(tunnelt) * MAXTUNNEL); + memset(tunn_local, 0, sizeof(tunnellocalt) * MAXTUNNEL); + for (i = 0; i < MAXTUNNEL; i++) { + tunn_local[i].l2tp_fd = -1; + } memset(bundle, 0, sizeof(bundlet) * MAXBUNDLE); memset(session, 0, sizeof(sessiont) * MAXSESSION); memset(radius, 0, sizeof(radiust) * MAXRADIUS); @@ -5645,6 +6198,9 @@ static void initdata(int optdebug, char *optconfig) { session[i].next = i + 1; session[i].tunnel = T_UNDEF; // mark it as not filled in. + sess_local[i].pppox_fd = -1; + sess_local[i].ppp_chan_fd = -1; + sess_local[i].ppp_if_fd = -1; } session[MAXSESSION - 1].next = 0; sessionfree = 1; @@ -6125,7 +6681,13 @@ int main(int argc, char *argv[]) rlim.rlim_max = RLIM_INFINITY; // Remove the maximum core size if (setrlimit(RLIMIT_CORE, &rlim) < 0) - LOG(0, 0, 0, "Can't set ulimit: %s\n", strerror(errno)); + LOG(0, 0, 0, "Can't set core ulimit: %s\n", strerror(errno)); + + rlim.rlim_cur = MAX_FDS; + rlim.rlim_max = MAX_FDS; + // Lift the maximum file open limit + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) + LOG(0, 0, 0, "Can't set nofile ulimit: %s\n", strerror(errno)); // Make core dumps go to /tmp if(chdir("/tmp")) LOG(0, 0, 0, "Error chdir /tmp: %s\n", strerror(errno)); @@ -6640,6 +7202,12 @@ static void update_config() LOG(0, 0, 0, "Can't write to PID file %s: %s\n", config->pid_file, strerror(errno)); } } + + for (i = 1; i <= config->cluster_highest_sessionid ; ++i) + { + if (session[i].ppp.lcp == Opened) + switch_kernel_accel(i); + } } static void read_config_file() @@ -6739,6 +7307,8 @@ int sessionsetup(sessionidt s, tunnelidt t) } } + create_kernel_accel(s); + // no need to set a route for the same IP address of the bundle if (!session[s].bundle || (bundle[session[s].bundle].num_of_links == 1)) // Add the route for this session. @@ -6768,6 +7338,52 @@ int sessionsetup(sessionidt s, tunnelidt t) return 1; // RADIUS OK and IP allocated, done... } +// +// This tunnel just got dropped on us by the master or something. +// Make sure our tables up up to date... +// +int load_tunnel(tunnelidt t, tunnelt *new) +{ + int dropped = 0; + int ip_changed = 0; + + if (tunnel[t].state != TUNNELFREE && new->state == TUNNELFREE) + dropped = 1; + + // if already connected, check if IP changed + if (tunn_local[t].l2tp_fd >= 0 && (tunnel[t].ip != new->ip || tunnel[t].port != new->port)) + ip_changed = 1; + + memcpy(&tunnel[t], new, sizeof(tunnel[t]) ); + + // + // Clear tunnel control messages. These are dynamically allocated. + // If we get unlucky, this may cause the tunnel to drop! + // + tunnel[t].controls = tunnel[t].controle = NULL; + tunnel[t].controlc = 0; + + if (tunnel[t].state == TUNNELFREE) + { + if (dropped) + delete_kernel_tunnel(t); + } + else + { + create_kernel_tunnel(t, tunnel[t].far); + + if (ip_changed) { + LOG(2, 0, t, "Updating tunnel IP from heartbeat\n"); + update_kernel_tunnel(0, t); + } + + if (t > config->cluster_highest_tunnelid) // Maintain this in the slave too. + config->cluster_highest_tunnelid = t; + } + + return 1; +} + // // This session just got dropped on us by the master or something. // Make sure our tables up up to date... @@ -6776,6 +7392,7 @@ int load_session(sessionidt s, sessiont *new) { int i; int newip = 0; + int newsession = 0; // Sanity checks. if (new->ip_pool_index >= MAXIPPOOL || @@ -6791,6 +7408,11 @@ int load_session(sessionidt s, sessiont *new) // loading the new session. // + if (new->tunnel != session[s].tunnel || + new->far != session[s].far) + // This is a new session + newsession = 1; + session[s].tunnel = new->tunnel; // For logging in cache_ipmap // See if routes/ip cache need updating @@ -6810,7 +7432,14 @@ int load_session(sessionidt s, sessiont *new) // remove old IPV6 routes... routes6set(s, &session[s], 0); + } + if (newsession) + // The session changed, drop existing kernel acceleration + delete_kernel_accel(s); + + if (newip) + { // add new routes... routesset(s, new, 1); } @@ -6854,6 +7483,9 @@ int load_session(sessionidt s, sessiont *new) if (new->ip_pool_index != -1) fix_address_pool(s); + // and try to enable kernel acceleration + switch_kernel_accel(s); + return 1; } diff --git a/l2tpns.h b/l2tpns.h index 80cccf5..21b5d1b 100644 --- a/l2tpns.h +++ b/l2tpns.h @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include #define VERSION "2.4.1" @@ -446,6 +449,18 @@ typedef struct uint32_t jitteravg; // time in milliseconds of the last fragment. uint64_t prev_time; + + // l2tp PPPoL2TP socket + int pppox_fd; + struct pppol2tp_ioc_stats last_stats; + + // ppp channel + int ppp_chan_fd; + + // ppp interface + int ppp_if_fd; + // ppp interface index + int ppp_if_idx; } sessionlocalt; // session flags @@ -479,6 +494,12 @@ typedef struct } tunnelt; +typedef struct +{ + int l2tp_fd; // kernel acceleration UDP socket +} +tunnellocalt; + // 164 bytes per radius session typedef struct // outstanding RADIUS requests { @@ -821,6 +842,7 @@ typedef struct uint32_t dhcp6_server_duid; // DUID of dhcpv6 server (see rfc3315) uint32_t dns6_lifetime; // RDNSS lifetime default 1200 (see rfc6106, rfc4861) (MaxRtrAdvInterval <= Lifetime <= 2*MaxRtrAdvInterval) char default_ipv6_domain_list[255]; + int kernel_accel; // Enable kernel-accelerated support } configt; enum config_typet { INT, STRING, UNSIGNED_LONG, SHORT, BOOL, IPv4, IPv6 }; @@ -1008,6 +1030,7 @@ int sessionsetup(sessionidt s, tunnelidt t); int run_plugins(int plugin_type, void *data); void rebuild_address_pool(void); void throttle_session(sessionidt s, int rate_in, int rate_out); +int load_tunnel(tunnelidt, tunnelt *); int load_session(sessionidt, sessiont *); void become_master(void); // We're the master; kick off any required master initializations. @@ -1028,6 +1051,7 @@ extern tunnelt *tunnel; extern bundlet *bundle; extern sessiont *session; extern sessionlocalt *sess_local; +extern tunnellocalt *tunn_local; extern ippoolt *ip_address_pool; #define sessionfree (session[0].next) @@ -1057,7 +1081,11 @@ struct event_data { FD_TYPE_BGP, FD_TYPE_RTNETLINK, FD_TYPE_PPPOEDISC, - FD_TYPE_PPPOESESS + FD_TYPE_PPPOESESS, + FD_TYPE_L2TP, + FD_TYPE_PPPOX, + FD_TYPE_PPP_CHAN, + FD_TYPE_PPP_IF, } type; int index; // for RADIUS, BGP, UDP }; diff --git a/util.c b/util.c index 6e50fce..d2f5445 100644 --- a/util.c +++ b/util.c @@ -127,6 +127,18 @@ pid_t fork_and_close() if (pppoediscfd != -1) close(pppoediscfd); if (pppoesessfd != -1) close(pppoesessfd); + for (i = 0; i <= config->cluster_highest_tunnelid; i++) + { + if (tunn_local[i].l2tp_fd >= 0) close(tunn_local[i].l2tp_fd); + } + + for (i = 0; i <= config->cluster_highest_sessionid; i++) + { + if (sess_local[i].pppox_fd >= 0) close(sess_local[i].pppox_fd); + if (sess_local[i].ppp_chan_fd >= 0) close(sess_local[i].ppp_chan_fd); + if (sess_local[i].ppp_if_fd >= 0) close(sess_local[i].ppp_if_fd); + } + if (snoopfd != -1) close(snoopfd); if (rand_fd != -1) close(rand_fd); if (epollfd != -1) close(epollfd);