merge cluster fixes from release_2_0 branch

This commit is contained in:
bodea 2005-05-26 12:17:30 +00:00
parent 75ca5fa449
commit ca4801db55
8 changed files with 143 additions and 32 deletions

18
Changes
View file

@ -1,4 +1,4 @@
* Mon May 16 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0 * Thu May 26 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0
- Add IPv6 support from Jonathan McDowell. - Add IPv6 support from Jonathan McDowell.
- Add CHAP support from Jordan Hrycaj. - Add CHAP support from Jordan Hrycaj.
- Add interim accounting support from Vladislav Bjelic. - Add interim accounting support from Vladislav Bjelic.
@ -61,6 +61,22 @@
- Add sessionctl plugin to provide drop/kill via nsctl. - Add sessionctl plugin to provide drop/kill via nsctl.
- Add handling of "throttle=N" RADIUS attributes. - Add handling of "throttle=N" RADIUS attributes.
- Fix RADIUS indexing (should have 16K entries with 64 sockets). - Fix RADIUS indexing (should have 16K entries with 64 sockets).
- Cluster changes from Michael, intended to prevent a stray master
from trashing a cluster:
+ Ignore heartbeats from peers claiming to be the master before the
timeout on the old master has expired.
+ A master receiving a stray heartbeat sends a unicast HB back, which
should cause the rogue to die due to the tie-breaker code.
+ Keep probing the master for late heartbeats.
+ Drop BGP as soon as we become master with the minumum required peers.
+ Any PING seen from a master forces an election (rather than just
where basetime is zero).
+ A slave which receives a LASTSEEN message (presumably a restarted
master) sends back new message type, C_MASTER which indicates the
address of the current master.
- New config option: cluster_master_min_adv which determines the minimum
number of up to date slaves required before the master will drop
routes.
* Fri Dec 17 2004 Brendan O'Dea <bod@optusnet.com.au> 2.0.13 * Fri Dec 17 2004 Brendan O'Dea <bod@optusnet.com.au> 2.0.13
- Better cluster master collision resolution: keep a counter of state - Better cluster master collision resolution: keep a counter of state

View file

@ -324,6 +324,11 @@ Cluster heartbeat timeout in tenths of a second. A new master will be
elected when this interval has been passed without seeing a heartbeat elected when this interval has been passed without seeing a heartbeat
from the master. from the master.
</LI> </LI>
<LI><B>cluster_master_min_adv</B> (int)<BR>
Determines the minumum number of up to date slaves required before the
master will drop routes (default: 1).
</LI>
</UL> </UL>
<P>BGP routing configuration is entered by the command: <P>BGP routing configuration is entered by the command:

View file

@ -2,7 +2,7 @@
.de Id .de Id
.ds Dt \\$4 \\$5 .ds Dt \\$4 \\$5
.. ..
.Id $Id: startup-config.5,v 1.7 2005/05/10 11:59:25 bodea Exp $ .Id $Id: startup-config.5,v 1.8 2005/05/26 12:17:31 bodea Exp $
.TH STARTUP-CONFIG 5 "\*(Dt" L2TPNS "File Formats and Conventions" .TH STARTUP-CONFIG 5 "\*(Dt" L2TPNS "File Formats and Conventions"
.SH NAME .SH NAME
startup\-config \- configuration file for l2tpns startup\-config \- configuration file for l2tpns
@ -189,6 +189,10 @@ Cluster heartbeat timeout in tenths of a second. A new master will be
elected when this interval has been passed without seeing a heartbeat elected when this interval has been passed without seeing a heartbeat
from the master. from the master.
.TP .TP
.B cluster_master_min_adv
Determines the minumum number of up to date slaves required before the
master will drop routes (default: 1).
.TP
.B ipv6_prefix .B ipv6_prefix
Enable negotiation of IPv6. This forms the the first 64 bits of the Enable negotiation of IPv6. This forms the the first 64 bits of the
client allocated address. The remaining 64 come from the allocated client allocated address. The remaining 64 come from the allocated

126
cluster.c
View file

@ -1,6 +1,6 @@
// L2TPNS Clustering Stuff // L2TPNS Clustering Stuff
char const *cvs_id_cluster = "$Id: cluster.c,v 1.38 2005/05/24 07:45:13 bodea Exp $"; char const *cvs_id_cluster = "$Id: cluster.c,v 1.39 2005/05/26 12:17:30 bodea Exp $";
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -193,7 +193,7 @@ static void add_type(char **p, int type, int more, char *data, int size)
} }
// advertise our presence via BGP or gratuitous ARP // advertise our presence via BGP or gratuitous ARP
static void advertise(void) static void advertise_routes(void)
{ {
#ifdef BGP #ifdef BGP
if (bgp_configured) if (bgp_configured)
@ -204,6 +204,15 @@ static void advertise(void)
send_garp(config->bind_address); // Start taking traffic. send_garp(config->bind_address); // Start taking traffic.
} }
// withdraw our routes (BGP only)
static void withdraw_routes(void)
{
#ifdef BGP
if (bgp_configured)
bgp_enable_routing(0);
#endif /* BGP */
}
static void cluster_uptodate(void) static void cluster_uptodate(void)
{ {
if (config->cluster_iam_uptodate) if (config->cluster_iam_uptodate)
@ -215,7 +224,7 @@ static void cluster_uptodate(void)
config->cluster_iam_uptodate = 1; config->cluster_iam_uptodate = 1;
LOG(0, 0, 0, "Now uptodate with master.\n"); LOG(0, 0, 0, "Now uptodate with master.\n");
advertise(); advertise_routes();
} }
// //
@ -457,17 +466,22 @@ void cluster_check_slaves(void)
continue; // Shutdown peer! Skip them. continue; // Shutdown peer! Skip them.
if (peers[i].uptodate) if (peers[i].uptodate)
have_peers = 1; have_peers++;
else
if (!peers[i].uptodate)
config->cluster_iam_uptodate = 0; // Start fast heartbeats config->cluster_iam_uptodate = 0; // Start fast heartbeats
} }
#ifdef BGP // in a cluster, withdraw/add routes when we get a peer/lose peers
// in a cluster, withdraw/add routes when we get a peer/lose all peers if (have_peers != had_peers)
if (bgp_configured && have_peers != had_peers) {
bgp_enable_routing(!have_peers); if (had_peers < config->cluster_master_min_adv &&
#endif /* BGP */ have_peers >= config->cluster_master_min_adv)
withdraw_routes();
else if (had_peers >= config->cluster_master_min_adv &&
have_peers < config->cluster_master_min_adv)
advertise_routes();
}
} }
// //
@ -480,20 +494,22 @@ void cluster_check_master(void)
int last_free = 0; int last_free = 0;
clockt t = TIME; clockt t = TIME;
static int probed = 0; static int probed = 0;
int have_peers;
if (config->cluster_iam_master) if (config->cluster_iam_master)
return; // Only runs on the slaves... return; // Only runs on the slaves...
// If the master is late (missed 2 hearbeats by a second and a // If the master is late (missed 2 hearbeats by a second and a
// hair) it may be that the switch has dropped us from the // hair) it may be that the switch has dropped us from the
// multicast group, try unicasting one probe to the master // multicast group, try unicasting probes to the master
// which will hopefully respond with a unicast heartbeat that // which will hopefully respond with a unicast heartbeat that
// will allow us to limp along until the querier next runs. // will allow us to limp along until the querier next runs.
if (TIME > (config->cluster_last_hb + 2 * config->cluster_hb_interval + 11)) if (config->cluster_master_address
&& TIME > (config->cluster_last_hb + 2 * config->cluster_hb_interval + 11))
{ {
if (!probed && config->cluster_master_address) if (!probed || (TIME > (probed + 2 * config->cluster_hb_interval)))
{ {
probed = 1; probed = TIME;
LOG(1, 0, 0, "Heartbeat from master %.1fs late, probing...\n", LOG(1, 0, 0, "Heartbeat from master %.1fs late, probing...\n",
0.1 * (TIME - (config->cluster_last_hb + config->cluster_hb_interval))); 0.1 * (TIME - (config->cluster_last_hb + config->cluster_hb_interval)));
@ -511,7 +527,7 @@ void cluster_check_master(void)
LOG(0, 0, 0, "Master timed out! Holding election...\n"); LOG(0, 0, 0, "Master timed out! Holding election...\n");
for (i = 0; i < num_peers; i++) for (i = have_peers = 0; i < num_peers; i++)
{ {
if ((peers[i].timestamp + config->cluster_hb_timeout) < t) if ((peers[i].timestamp + config->cluster_hb_timeout) < t)
continue; // Stale peer! Skip them. continue; // Stale peer! Skip them.
@ -529,6 +545,9 @@ void cluster_check_master(void)
LOG(1, 0, 0, "Expecting %s to become master\n", fmtaddr(peers[i].peer, 0)); LOG(1, 0, 0, "Expecting %s to become master\n", fmtaddr(peers[i].peer, 0));
return; // They'll win the election. Wait for them to come up. return; // They'll win the election. Wait for them to come up.
} }
if (peers[i].uptodate)
have_peers++;
} }
// Wow. it's been ages since I last heard a heartbeat // Wow. it's been ages since I last heard a heartbeat
@ -540,6 +559,11 @@ void cluster_check_master(void)
LOG(0, 0, 0, "I am declaring myself the master!\n"); LOG(0, 0, 0, "I am declaring myself the master!\n");
if (have_peers < config->cluster_master_min_adv)
advertise_routes();
else
withdraw_routes();
if (config->cluster_seq_number == -1) if (config->cluster_seq_number == -1)
config->cluster_seq_number = 0; config->cluster_seq_number = 0;
@ -620,9 +644,6 @@ void cluster_check_master(void)
config->cluster_undefined_tunnels = 0; config->cluster_undefined_tunnels = 0;
config->cluster_iam_uptodate = 1; // assume all peers are up-to-date config->cluster_iam_uptodate = 1; // assume all peers are up-to-date
if (!num_peers) // lone master
advertise();
// FIXME. We need to fix up the tunnel control message // FIXME. We need to fix up the tunnel control message
// queue here! There's a number of other variables we // queue here! There's a number of other variables we
// should also update. // should also update.
@ -902,6 +923,13 @@ static int cluster_catchup_slave(int seq, in_addr_t slave)
int diff; int diff;
LOG(1, 0, 0, "Slave %s sent LASTSEEN with seq %d\n", fmtaddr(slave, 0), seq); LOG(1, 0, 0, "Slave %s sent LASTSEEN with seq %d\n", fmtaddr(slave, 0), seq);
if (!config->cluster_iam_master) {
LOG(1, 0, 0, "Got LASTSEEN but I'm not a master! Redirecting it to %s.\n",
fmtaddr(config->cluster_master_address, 0));
peer_send_message(slave, C_MASTER, config->cluster_master_address, NULL, 0);
return 0;
}
diff = config->cluster_seq_number - seq; // How many packet do we need to send? diff = config->cluster_seq_number - seq; // How many packet do we need to send?
if (diff < 0) if (diff < 0)
@ -913,9 +941,11 @@ static int cluster_catchup_slave(int seq, in_addr_t slave)
return peer_send_message(slave, C_KILL, seq, NULL, 0);// Kill the slave. Nothing else to do. return peer_send_message(slave, C_KILL, seq, NULL, 0);// Kill the slave. Nothing else to do.
} }
LOG(1, 0, 0, "Sending %d catchup packets to slave %s\n", diff, fmtaddr(slave, 0) );
// Now resend every packet that it missed, in order. // Now resend every packet that it missed, in order.
while (seq != config->cluster_seq_number) { while (seq != config->cluster_seq_number) {
s = seq%HB_HISTORY_SIZE; s = seq % HB_HISTORY_SIZE;
if (seq != past_hearts[s].seq) { if (seq != past_hearts[s].seq) {
LOG(0, 0, 0, "Tried to re-send heartbeat for %s but %d doesn't match %d! (%d,%d)\n", LOG(0, 0, 0, "Tried to re-send heartbeat for %s but %d doesn't match %d! (%d,%d)\n",
fmtaddr(slave, 0), seq, past_hearts[s].seq, s, config->cluster_seq_number); fmtaddr(slave, 0), seq, past_hearts[s].seq, s, config->cluster_seq_number);
@ -968,8 +998,10 @@ static int cluster_add_peer(in_addr_t peer, time_t basetime, pingt *pp, int size
} }
// Is this the master shutting down?? // Is this the master shutting down??
if (peer == config->cluster_master_address && !basetime) { if (peer == config->cluster_master_address) {
LOG(3, 0, 0, "Master %s shutting down...\n", fmtaddr(config->cluster_master_address, 0)); LOG(3, 0, 0, "Master %s %s\n", fmtaddr(config->cluster_master_address, 0),
basetime ? "has restarted!" : "shutting down...");
config->cluster_master_address = 0; config->cluster_master_address = 0;
config->cluster_last_hb = 0; // Force an election. config->cluster_last_hb = 0; // Force an election.
cluster_check_master(); cluster_check_master();
@ -1010,6 +1042,20 @@ static int cluster_add_peer(in_addr_t peer, time_t basetime, pingt *pp, int size
return 1; return 1;
} }
// A slave responds with C_MASTER when it gets a message which should have gone to a master.
static int cluster_set_master(in_addr_t peer, in_addr_t master)
{
if (config->cluster_iam_master) // Sanity...
return 0;
LOG(3, 0, 0, "Peer %s set the master to %s...\n", fmtaddr(peer, 0),
fmtaddr(master, 1));
config->cluster_master_address = master;
cluster_check_master();
return 0;
}
/* Handle the slave updating the byte counters for the master. */ /* Handle the slave updating the byte counters for the master. */
// //
// Note that we don't mark the session as dirty; We rely on // Note that we don't mark the session as dirty; We rely on
@ -1283,9 +1329,33 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t
exit(1); exit(1);
} }
//
// Send it a unicast heartbeat to see give it a chance to die.
// NOTE: It's actually safe to do seq-number - 1 without checking
// for wrap around.
//
cluster_catchup_slave(config->cluster_seq_number - 1, addr);
return -1; // Skip it. return -1; // Skip it.
} }
//
// Try and guard against a stray master appearing.
//
// Ignore heartbeats received from another master before the
// timeout (less a smidgen) for the old master has elapsed.
//
// Note that after a clean failover, the cluster_master_address
// is cleared, so this doesn't run.
//
if (config->cluster_master_address && addr != config->cluster_master_address
&& (config->cluster_last_hb + config->cluster_hb_timeout - 11) > TIME) {
LOG(0, 0, 0, "Ignoring stray heartbeat from %s, current master %s has not yet timed out (last heartbeat %.1f seconds ago).\n",
fmtaddr(addr, 0), fmtaddr(config->cluster_master_address, 1),
0.1 * (TIME - config->cluster_last_hb));
return -1; // ignore
}
if (config->cluster_seq_number == -1) // Don't have one. Just align to the master... if (config->cluster_seq_number == -1) // Don't have one. Just align to the master...
config->cluster_seq_number = h->seq; config->cluster_seq_number = h->seq;
@ -1487,10 +1557,13 @@ int processcluster(char *data, int size, in_addr_t addr)
s -= sizeof(uint32_t); s -= sizeof(uint32_t);
switch (type) { switch (type) {
case C_PING: // Update the peers table. case C_PING: // Update the peers table.
return cluster_add_peer(addr, more, (pingt *) p, s); return cluster_add_peer(addr, more, (pingt *) p, s);
case C_LASTSEEN: // Catch up a slave (slave missed a packet). case C_MASTER: // Our master is wrong
return cluster_set_master(addr, more);
case C_LASTSEEN: // Catch up a slave (slave missed a packet).
return cluster_catchup_slave(more, addr); return cluster_catchup_slave(more, addr);
case C_FORWARD: { // Forwarded control packet. pass off to processudp. case C_FORWARD: { // Forwarded control packet. pass off to processudp.
@ -1531,6 +1604,11 @@ int processcluster(char *data, int size, in_addr_t addr)
return 0; return 0;
case C_BYTES: case C_BYTES:
if (!config->cluster_iam_master) {
LOG(0, 0, 0, "I'm not the master, but I got a C_BYTES from %s?\n", fmtaddr(addr, 0));
return -1;
}
return cluster_handle_bytes(p, s); return cluster_handle_bytes(p, s);
case C_KILL: // The master asked us to die!? (usually because we're too out of date). case C_KILL: // The master asked us to die!? (usually because we're too out of date).

View file

@ -1,5 +1,5 @@
// L2TPNS Clustering Stuff // L2TPNS Clustering Stuff
// $Id: cluster.h,v 1.10 2005/05/08 08:00:50 bodea Exp $ // $Id: cluster.h,v 1.11 2005/05/26 12:17:30 bodea Exp $
#ifndef __CLUSTER_H__ #ifndef __CLUSTER_H__
#define __CLUSTER_H__ #define __CLUSTER_H__
@ -14,11 +14,12 @@
#define C_LASTSEEN 7 // Tell master the last heartbeat that I handled. #define C_LASTSEEN 7 // Tell master the last heartbeat that I handled.
#define C_KILL 8 // Tell a slave to die. #define C_KILL 8 // Tell a slave to die.
#define C_FORWARD 9 // Forwarded packet.. #define C_FORWARD 9 // Forwarded packet..
#define C_BYTES 10 // Update byte counters. #define C_BYTES 10 // Update byte counters.
#define C_THROTTLE 11 // A packet for the master to throttle. (The TBF tells direction). #define C_THROTTLE 11 // A packet for the master to throttle. (The TBF tells direction).
#define C_CSESSION 12 // Compressed session structure. #define C_CSESSION 12 // Compressed session structure.
#define C_CTUNNEL 13 // Compressed tunnel structure. #define C_CTUNNEL 13 // Compressed tunnel structure.
#define C_GARDEN 14 // Gardened packet #define C_GARDEN 14 // Gardened packet
#define C_MASTER 15 // Tell a slave the address of the master.
#define HB_VERSION 5 // Protocol version number.. #define HB_VERSION 5 // Protocol version number..
#define HB_MAX_SEQ (1<<30) // Maximum sequence number. (MUST BE A POWER OF 2!) #define HB_MAX_SEQ (1<<30) // Maximum sequence number. (MUST BE A POWER OF 2!)

View file

@ -4,7 +4,7 @@
// Copyright (c) 2002 FireBrick (Andrews & Arnold Ltd / Watchfront Ltd) - GPL licenced // Copyright (c) 2002 FireBrick (Andrews & Arnold Ltd / Watchfront Ltd) - GPL licenced
// vim: sw=8 ts=8 // vim: sw=8 ts=8
char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.104 2005/05/16 04:51:16 bodea Exp $"; char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.105 2005/05/26 12:17:30 bodea Exp $";
#include <arpa/inet.h> #include <arpa/inet.h>
#include <assert.h> #include <assert.h>
@ -131,6 +131,7 @@ config_descriptt config_values[] = {
CONFIG("cluster_interface", cluster_interface, STRING), CONFIG("cluster_interface", cluster_interface, STRING),
CONFIG("cluster_hb_interval", cluster_hb_interval, INT), CONFIG("cluster_hb_interval", cluster_hb_interval, INT),
CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT), CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT),
CONFIG("cluster_master_min_adv", cluster_master_min_adv, INT),
CONFIG("ipv6_prefix", ipv6_prefix, IPv6), CONFIG("ipv6_prefix", ipv6_prefix, IPv6),
{ NULL, 0, 0, 0 }, { NULL, 0, 0, 0 },
}; };
@ -3199,6 +3200,7 @@ static void initdata(int optdebug, char *optconfig)
config->debug = optdebug; config->debug = optdebug;
config->num_tbfs = MAXTBFS; config->num_tbfs = MAXTBFS;
config->rl_rate = 28; // 28kbps config->rl_rate = 28; // 28kbps
config->cluster_master_min_adv = 1;
strcpy(config->random_device, RANDOMDEVICE); strcpy(config->random_device, RANDOMDEVICE);
log_stream = stderr; log_stream = stderr;

View file

@ -1,5 +1,5 @@
// L2TPNS Global Stuff // L2TPNS Global Stuff
// $Id: l2tpns.h,v 1.72 2005/05/20 01:32:14 bodea Exp $ // $Id: l2tpns.h,v 1.73 2005/05/26 12:17:30 bodea Exp $
#ifndef __L2TPNS_H__ #ifndef __L2TPNS_H__
#define __L2TPNS_H__ #define __L2TPNS_H__
@ -474,7 +474,8 @@ typedef struct
char old_plugins[64][MAXPLUGINS]; char old_plugins[64][MAXPLUGINS];
int next_tbf; // Next HTB id available to use int next_tbf; // Next HTB id available to use
int scheduler_fifo; // If the system has multiple CPUs, use FIFO scheduling policy for this process. int scheduler_fifo; // If the system has multiple CPUs, use FIFO scheduling
// policy for this process.
int lock_pages; // Lock pages into memory. int lock_pages; // Lock pages into memory.
int icmp_rate; // Max number of ICMP unreachable per second to send int icmp_rate; // Max number of ICMP unreachable per second to send
int max_packets; // DoS prevention: per session limit of packets/0.1s int max_packets; // DoS prevention: per session limit of packets/0.1s
@ -501,6 +502,10 @@ typedef struct
struct in6_addr ipv6_prefix; // Our IPv6 network pool. struct in6_addr ipv6_prefix; // Our IPv6 network pool.
int cluster_master_min_adv; // Master advertises routes while the number of up to date
// slaves is less than this value.
#ifdef BGP #ifdef BGP
#define BGP_NUM_PEERS 2 #define BGP_NUM_PEERS 2
uint16_t as_number; uint16_t as_number;

View file

@ -43,5 +43,5 @@ rm -rf %{buildroot}
%attr(644,root,root) /usr/share/man/man[58]/* %attr(644,root,root) /usr/share/man/man[58]/*
%changelog %changelog
* Mon May 16 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0-1 * Thu May 26 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0-1
- 2.1.0 release, see /usr/share/doc/l2tpns-2.1.0/Changes - 2.1.0 release, see /usr/share/doc/l2tpns-2.1.0/Changes