Skip to content

Commit 2c3caf8

Browse files
committed
vrrp: add vrrp_exit_timer_thread
Adding a vrrp_exit_timer_thread, to keep track of the fault exit timeout. A callback is triggered which either brings up the instance if it is triggered from the try_up_instance or just changes the state, if it is triggered init time because the config is reloaded, while the instance is in fault state because of a fault exit timer (Without fault init exit delay, the instance would start in backup state). When fault_init_exit_delay is modified, and the config is reloaded, the new fault_init_exit_delay is applied in the following manner: 1. If the fault_init_exit_time has already passed, don't change it. The new fault_init_exit_delay will only be applied the next time the interface comes out of fault state. 2. If the new fault_init_exit_delay is greater than the old one, and the fault_init_exit_delay timer is presently running, increase the timer by the difference. 3. If the new fault_init_exit_delay is less than the old one, and the propagation_delay timer is in progress, reduce the timer by the difference between the two, provided the reduced time is greater than the present time.
1 parent aa918b3 commit 2c3caf8

File tree

5 files changed

+127
-52
lines changed

5 files changed

+127
-52
lines changed

keepalived/include/vrrp.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,12 @@ typedef struct _vrrp_t {
378378
* remain in Fault or Init state before transitioning to
379379
* another state. 0 means no delay.
380380
*/
381+
thread_ref_t fault_exit_timer_thread; /* Fault exit timer thread, that starts a timer. */
382+
timeval_t fault_exit_time; /* Time after which the instance moves from fault state.
383+
* used when instance is reloaded while the timer is
384+
* running */
385+
void (*fault_exit_timer_cb)(struct _vrrp_t*);
386+
/* Callback that is executed at the expiry of fault_exit_timer */
381387
int state; /* internal state (init/backup/master/fault) */
382388
#ifdef _WITH_SNMP_VRRP_
383389
int configured_state; /* the configured state of the instance */

keepalived/include/vrrp_scheduler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ extern void vrrp_gratuitous_arp_vmac_update_thread(thread_ref_t);
7373
extern void vrrp_arp_thread(thread_ref_t);
7474
extern void vrrp_gna_thread(thread_ref_t);
7575
extern void try_up_instance(vrrp_t *, bool);
76+
extern void up_instance(vrrp_t *);
7677
#ifdef _WITH_DUMP_THREADS_
7778
extern void dump_threads(void);
7879
#endif

keepalived/vrrp/vrrp.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,7 @@ vrrp_complete_init(void)
47884788
vrrp_script_t *scr, *scr_tmp;
47894789
unsigned quickest_takeover;
47904790
unsigned vrrp_timeout_min = UINT_MAX;
4791+
timeval_t fault_exit_time;
47914792

47924793
/* Set defaults if not specified, depending on strict mode */
47934794
if (global_data->vrrp_garp_lower_prio_rep == PARAMETER_UNSET)
@@ -5018,20 +5019,33 @@ vrrp_complete_init(void)
50185019
if (reload) {
50195020
/* Now step through the old vrrp to set the status on matching new instances */
50205021
list_for_each_entry(old_vrrp, &old_vrrp_data->vrrp, e_list) {
5021-
/* We work out for ourselves if the vrrp instance
5022-
* should be in fault state, so it doesn't matter
5023-
* if it was before */
5024-
if (old_vrrp->state == VRRP_STATE_FAULT)
5025-
continue;
5026-
50275022
vrrp = vrrp_exist(old_vrrp, &vrrp_data->vrrp);
50285023
if (vrrp) {
5024+
if (old_vrrp->fault_exit_timer_thread)
5025+
thread_cancel(old_vrrp->fault_exit_timer_thread);
5026+
50295027
/* If we have detected a fault, don't override it */
50305028
if (vrrp->state == VRRP_STATE_FAULT || vrrp->num_script_init)
50315029
continue;
50325030

5031+
fault_exit_time = old_vrrp->fault_exit_time;
5032+
if (vrrp->fault_init_exit_delay >= old_vrrp->fault_init_exit_delay)
5033+
fault_exit_time = timer_add_long(old_vrrp->fault_exit_time,
5034+
vrrp->fault_init_exit_delay - old_vrrp->fault_init_exit_delay);
5035+
else
5036+
fault_exit_time = timer_sub_long(old_vrrp->fault_exit_time,
5037+
old_vrrp->fault_init_exit_delay - vrrp->fault_init_exit_delay);
5038+
/* We work out for ourselves if the vrrp instance
5039+
* should be in fault state, except if it is in
5040+
* fault state because of fault_init_exit_delay).
5041+
* If the fault_exit_time after reload is already in the past, ignore it. */
5042+
if (old_vrrp->state == VRRP_STATE_FAULT &&
5043+
timercmp(&time_now, &fault_exit_time, >))
5044+
continue;
5045+
50335046
vrrp->state = old_vrrp->state;
50345047
vrrp->wantstate = old_vrrp->state;
5048+
vrrp->fault_exit_time = fault_exit_time;
50355049
}
50365050
}
50375051

keepalived/vrrp/vrrp_scheduler.c

Lines changed: 94 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,28 @@ static struct {
176176
/* FAULT */ { {NULL}, {NULL}, {vrrp_sync_master}, {NULL} }
177177
};
178178

179+
static void
180+
vrrp_state_set_backup(vrrp_t *vrrp)
181+
{
182+
log_message(LOG_INFO, "(%s) VRRP moving state to backup", vrrp->iname);
183+
vrrp->state = VRRP_STATE_BACK;
184+
}
185+
186+
static void
187+
vrrp_fault_exit_timer_thread(thread_ref_t thread)
188+
{
189+
vrrp_t *vrrp = THREAD_ARG(thread);
190+
191+
log_message(LOG_INFO, "(%s) VRRP fault_init_exit_delay of %g seconds complete.",
192+
vrrp->iname, vrrp->fault_init_exit_delay/TIMER_HZ_DOUBLE);
193+
194+
if (vrrp->fault_exit_timer_cb)
195+
vrrp->fault_exit_timer_cb(vrrp);
196+
vrrp->fault_exit_timer_thread = NULL;
197+
vrrp->fault_exit_timer_cb = NULL;
198+
vrrp->fault_exit_time.tv_sec = 0;
199+
}
200+
179201
/*
180202
* Initialize state handling
181203
* --rfc2338.6.4.1
@@ -274,8 +296,15 @@ vrrp_init_state(list_head_t *l)
274296
!vrrp->num_script_init &&
275297
(!vrrp->sync || !vrrp->sync->num_member_init)) {
276298
if (vrrp->state != VRRP_STATE_BACK) {
277-
log_message(LOG_INFO, "(%s) Entering BACKUP STATE (init)", vrrp->iname);
278-
vrrp->state = VRRP_STATE_BACK;
299+
if (vrrp->state == VRRP_STATE_FAULT && timercmp(&time_now, &vrrp->fault_exit_time, <)) {
300+
log_message(LOG_INFO, "(%s) Entering FAULT STATE due to exit delay time", vrrp->iname);
301+
vrrp->fault_exit_timer_cb = vrrp_state_set_backup;
302+
vrrp->fault_exit_timer_thread = thread_add_timer_sands(master,
303+
vrrp_fault_exit_timer_thread, vrrp, &vrrp->fault_exit_time);
304+
} else {
305+
log_message(LOG_INFO, "(%s) Entering BACKUP STATE (init)", vrrp->iname);
306+
vrrp->state = VRRP_STATE_BACK;
307+
}
279308
}
280309
} else {
281310
/* Note: if we have alpha mode scripts, we enter fault state, but don't want
@@ -324,8 +353,12 @@ vrrp_init_instance_sands(vrrp_t *vrrp)
324353
else
325354
vrrp->sands = timer_add_long(time_now, vrrp->ms_down_timer);
326355
}
327-
else if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT)
328-
vrrp->sands.tv_sec = TIMER_DISABLED;
356+
else if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT) {
357+
if (timercmp(&time_now, &vrrp->fault_exit_time, <)) {
358+
vrrp->sands = vrrp->fault_exit_time;
359+
} else
360+
vrrp->sands.tv_sec = TIMER_DISABLED;
361+
}
329362

330363
rb_move_cached(&vrrp->rb_sands, &vrrp->sockets->rb_sands, vrrp_timer_less);
331364
}
@@ -688,11 +721,59 @@ vrrp_gratuitous_arp_vmac_update_thread(thread_ref_t thread)
688721
#endif
689722

690723
void
691-
try_up_instance(vrrp_t *vrrp, bool leaving_init)
724+
up_instance(vrrp_t *vrrp)
692725
{
693726
int wantstate;
694727
ip_address_t ip_addr = {0};
695728

729+
/* If the sync group can't go to master, we must go to backup state */
730+
wantstate = vrrp->wantstate;
731+
if (vrrp->sync && vrrp->wantstate == VRRP_STATE_MAST && !vrrp_sync_can_goto_master(vrrp))
732+
vrrp->wantstate = VRRP_STATE_BACK;
733+
734+
/* We can come up */
735+
vrrp_state_leave_fault(vrrp);
736+
737+
/* If we are using unicast, the master may have lost us from its ARP cache.
738+
* We want to renew the ARP cache on the master, so that it can send adverts
739+
* to us straight away, without a delay before it sends an ARP request message
740+
* and we respond. If we don't do this, we can time out and transition to master
741+
* before the master renews its ARP entry, since the master cannot send us adverts
742+
* until it has done so. */
743+
if (__test_bit(VRRP_FLAG_UNICAST, &vrrp->flags) &&
744+
vrrp->ifp &&
745+
vrrp->saddr.ss_family != AF_UNSPEC) {
746+
if (__test_bit(LOG_DETAIL_BIT, &debug))
747+
log_message(LOG_INFO, "%s: sending gratuitous %s for %s", vrrp->iname, vrrp->family == AF_INET ? "ARP" : "NA", inet_sockaddrtos(&vrrp->saddr));
748+
749+
ip_addr.ifp = IF_BASE_IFP(vrrp->ifp);
750+
751+
if (vrrp->saddr.ss_family == AF_INET) {
752+
ip_addr.u.sin.sin_addr.s_addr = PTR_CAST(struct sockaddr_in, &vrrp->saddr)->sin_addr.s_addr;
753+
send_gratuitous_arp_immediate(ip_addr.ifp, &ip_addr);
754+
} else {
755+
/* IPv6 */
756+
ip_addr.u.sin6_addr = PTR_CAST(struct sockaddr_in6, &vrrp->saddr)->sin6_addr;
757+
ndisc_send_unsolicited_na_immediate(ip_addr.ifp, &ip_addr);
758+
}
759+
}
760+
761+
vrrp_init_instance_sands(vrrp);
762+
vrrp_thread_requeue_read(vrrp);
763+
764+
vrrp->wantstate = wantstate;
765+
766+
if (vrrp->sync) {
767+
if (vrrp->state == VRRP_STATE_MAST)
768+
vrrp_sync_master(vrrp);
769+
else
770+
vrrp_sync_backup(vrrp);
771+
}
772+
}
773+
774+
void
775+
try_up_instance(vrrp_t *vrrp, bool leaving_init)
776+
{
696777
if (leaving_init) {
697778
if (vrrp->num_script_if_fault)
698779
return;
@@ -735,49 +816,16 @@ try_up_instance(vrrp_t *vrrp, bool leaving_init)
735816
return;
736817
}
737818

738-
/* If the sync group can't go to master, we must go to backup state */
739-
wantstate = vrrp->wantstate;
740-
if (vrrp->sync && vrrp->wantstate == VRRP_STATE_MAST && !vrrp_sync_can_goto_master(vrrp))
741-
vrrp->wantstate = VRRP_STATE_BACK;
742-
743-
/* We can come up */
744-
vrrp_state_leave_fault(vrrp);
745-
746-
/* If we are using unicast, the master may have lost us from its ARP cache.
747-
* We want to renew the ARP cache on the master, so that it can send adverts
748-
* to us straight away, without a delay before it sends an ARP request message
749-
* and we respond. If we don't do this, we can time out and transition to master
750-
* before the master renews its ARP entry, since the master cannot send us adverts
751-
* until it has done so. */
752-
if (__test_bit(VRRP_FLAG_UNICAST, &vrrp->flags) &&
753-
vrrp->ifp &&
754-
vrrp->saddr.ss_family != AF_UNSPEC) {
755-
if (__test_bit(LOG_DETAIL_BIT, &debug))
756-
log_message(LOG_INFO, "%s: sending gratuitous %s for %s", vrrp->iname, vrrp->family == AF_INET ? "ARP" : "NA", inet_sockaddrtos(&vrrp->saddr));
757-
758-
ip_addr.ifp = IF_BASE_IFP(vrrp->ifp);
759-
760-
if (vrrp->saddr.ss_family == AF_INET) {
761-
ip_addr.u.sin.sin_addr.s_addr = PTR_CAST(struct sockaddr_in, &vrrp->saddr)->sin_addr.s_addr;
762-
send_gratuitous_arp_immediate(ip_addr.ifp, &ip_addr);
763-
} else {
764-
/* IPv6 */
765-
ip_addr.u.sin6_addr = PTR_CAST(struct sockaddr_in6, &vrrp->saddr)->sin6_addr;
766-
ndisc_send_unsolicited_na_immediate(ip_addr.ifp, &ip_addr);
767-
}
819+
if (vrrp->fault_init_exit_delay) {
820+
vrrp->fault_exit_time = timer_add_long(time_now, vrrp->fault_init_exit_delay);
821+
vrrp->fault_exit_timer_thread =
822+
thread_add_timer_sands(master, vrrp_fault_exit_timer_thread,
823+
vrrp, &vrrp->fault_exit_time);
824+
vrrp->fault_exit_timer_cb = up_instance;
825+
return;
768826
}
769827

770-
vrrp_init_instance_sands(vrrp);
771-
vrrp_thread_requeue_read(vrrp);
772-
773-
vrrp->wantstate = wantstate;
774-
775-
if (vrrp->sync) {
776-
if (vrrp->state == VRRP_STATE_MAST)
777-
vrrp_sync_master(vrrp);
778-
else
779-
vrrp_sync_backup(vrrp);
780-
}
828+
up_instance(vrrp);
781829
}
782830

783831
#ifdef _WITH_BFD_

keepalived/vrrp/vrrp_track.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,12 @@ down_instance(vrrp_t *vrrp)
552552
if (vrrp->sync && vrrp->sync->num_member_fault++ == 0)
553553
vrrp_sync_fault(vrrp);
554554
}
555+
if (vrrp->fault_exit_timer_thread) {
556+
thread_cancel(vrrp->fault_exit_timer_thread);
557+
vrrp->fault_exit_timer_thread = NULL;
558+
vrrp->fault_exit_timer_cb = NULL;
559+
vrrp->fault_exit_time.tv_sec = 0;
560+
}
555561
}
556562

557563
/* Set effective priorty, issue message on changes */

0 commit comments

Comments
 (0)