Skip to content

Keepalived et VIP

Introduction

Keepalived gère les adresses IP virtuelles (VIP) via le protocole VRRP. Il assure le failover automatique de la VIP entre les controllers, garantissant un point d'entrée unique et résilient pour les clients.

Prérequis

Points à apprendre

Fonctionnement VRRP

graph LR
    subgraph normal["État Normal"]
        vip1["VIP 10.0.0.10<br/>Active<br/>Sur Controller-1"]
        ka1["Keepalived-1<br/>MASTER<br/>Priority: 100"]
        ka2["Keepalived-2<br/>BACKUP<br/>Priority: 90"]
        ka3["Keepalived-3<br/>BACKUP<br/>Priority: 80"]
    end

    subgraph failover["Après Failover"]
        vip2["VIP 10.0.0.10<br/>Migrated<br/>Sur Controller-2"]
        ka1f["Keepalived-1<br/>DOWN<br/>Controller-1 failed"]
        ka2f["Keepalived-2<br/>MASTER<br/>Promoted"]
        ka3f["Keepalived-3<br/>BACKUP"]
    end

Configuration Kolla

# /etc/kolla/globals.yml

# VIP interne (management network)
kolla_internal_vip_address: "10.0.0.10"

# VIP externe (public network) - optionnel
kolla_external_vip_address: "203.0.113.10"

# Interface pour la VIP
network_interface: "eth0"
kolla_external_vip_interface: "eth1"

# ID du routeur VRRP (doit être unique sur le réseau)
keepalived_virtual_router_id: "51"

Configuration Keepalived générée

# Voir la configuration
docker exec keepalived cat /etc/keepalived/keepalived.conf
# /etc/keepalived/keepalived.conf

global_defs {
    notification_email {
        admin@example.com
    }
    notification_email_from keepalived@controller
    smtp_server 127.0.0.1
    smtp_connect_timeout 30
    router_id LVS_DEVEL
    vrrp_skip_check_adv_addr
    vrrp_garp_interval 0
    vrrp_gna_interval 0
}

vrrp_script check_haproxy {
    script "/usr/bin/check_haproxy.sh"
    interval 2
    weight -20
    fall 3
    rise 2
}

vrrp_instance VI_1 {
    state BACKUP
    interface eth0
    virtual_router_id 51
    priority 100
    advert_int 1

    authentication {
        auth_type PASS
        auth_pass kolla_keepalived
    }

    virtual_ipaddress {
        10.0.0.10/24 dev eth0
    }

    track_script {
        check_haproxy
    }

    notify_master "/usr/local/bin/kolla_notify.sh master"
    notify_backup "/usr/local/bin/kolla_notify.sh backup"
    notify_fault "/usr/local/bin/kolla_notify.sh fault"
}

Script de health check

#!/bin/bash
# /usr/bin/check_haproxy.sh

# Vérifie que HAProxy répond
if curl -s -o /dev/null -w "%{http_code}" http://localhost:1984/stats | grep -q "200"; then
    exit 0
else
    exit 1
fi

Priorités et élection

flowchart TB
    start([Start]) --> init["Tous les Keepalived démarrent<br/>en état BACKUP"]

    init --> parallel{" "}
    parallel --> c1["Controller-1<br/>Priority: 100"]
    parallel --> c2["Controller-2<br/>Priority: 90"]
    parallel --> c3["Controller-3<br/>Priority: 80"]

    c1 --> exchange
    c2 --> exchange
    c3 --> exchange

    exchange["Échange advertisements<br/>VRRP (multicast 224.0.0.18)"]

    exchange --> prio{Plus haute<br/>priorité?}
    prio -->|Oui| master["Devient MASTER<br/>Active la VIP"]
    prio -->|Non| backup["Reste BACKUP<br/>Attend advertisements"]

    master --> monitor["Monitoring continu"]
    backup --> monitor

    monitor --> fail{MASTER<br/>fail?}
    fail -->|Oui| timeout["Timeout (3x advert_int)<br/>Nouvelle élection"]
    fail -->|Non| continue["Continue monitoring"]

    timeout --> stop([Stop])
    continue --> stop

VIP externe vs interne

graph TB
    subgraph internet["Internet"]
        clients["Clients externes"]
    end

    subgraph dc["Datacenter"]
        subgraph dmz["DMZ / Public Network"]
            ext_vip["VIP Externe<br/>203.0.113.10<br/>kolla_external_vip_address"]
        end

        subgraph mgmt["Management Network"]
            int_vip["VIP Interne<br/>10.0.0.10<br/>kolla_internal_vip_address"]
        end

        subgraph ctrl["Controllers"]
            haproxy["HAProxy<br/>Écoute sur les deux VIPs"]
            services["OpenStack Services<br/>Communication interne"]
        end

        subgraph compute["Compute Nodes"]
            agents["Agents<br/>Utilisent VIP interne"]
        end
    end

    clients -->|"HTTPS (public)<br/>443"| ext_vip
    ext_vip --> haproxy
    agents -->|"HTTP (internal)"| int_vip
    int_vip --> haproxy

Gestion du failover

# Temps de failover = 3 x advert_int (par défaut 3 secondes)

# Forcer un failover (mettre MASTER en BACKUP)
docker exec keepalived pkill -USR1 keepalived

# Vérifier l'état actuel
docker exec keepalived cat /tmp/keepalived.state
# ou
ip addr show eth0 | grep "10.0.0.10"

# Logs du failover
docker logs keepalived | grep -E "(MASTER|BACKUP|FAULT)"

Preemption

# Par défaut, preemption activée
# Le nœud avec la plus haute priorité reprend le MASTER quand il revient

vrrp_instance VI_1 {
    # Désactiver preemption pour éviter les failovers inutiles
    nopreempt

    # Ou définir un délai avant preemption
    preempt_delay 300  # 5 minutes
}

Configuration personnalisée

# /etc/kolla/config/keepalived/keepalived.conf

# Ajouter des configurations custom

# Exemple: Notifications par script
vrrp_instance VI_1 {
    notify_master "/opt/scripts/on_master.sh"
    notify_backup "/opt/scripts/on_backup.sh"
}

Diagramme séquence failover

sequenceDiagram
    participant K1 as Keepalived-1<br/>(MASTER)
    participant VIP as VIP 10.0.0.10
    participant K2 as Keepalived-2<br/>(BACKUP)
    participant K3 as Keepalived-3<br/>(BACKUP)
    participant C as Clients

    K1->>K2: VRRP Advertisement<br/>(every 1s)
    K1->>K3: VRRP Advertisement

    Note over K1: Controller-1 crashes

    K2->>K2: No advertisement<br/>for 3 seconds

    K2->>K2: Timeout!<br/>I become MASTER

    K2->>VIP: ip addr add<br/>10.0.0.10/24

    K2->>K2: Send Gratuitous ARP

    K2->>K3: VRRP Advertisement<br/>(I'm MASTER now)

    C->>VIP: Request
    VIP->>K2: Forward to new MASTER

    Note over C: Failover complete<br/>< 5 seconds

Exemples pratiques

Vérifier l'état Keepalived

#!/bin/bash
# keepalived-status.sh

echo "=== Keepalived Process ==="
docker exec keepalived ps aux | grep keepalived

echo -e "\n=== Current State ==="
for host in controller-{1,2,3}; do
    echo -n "$host: "
    ssh $host "ip addr show eth0 | grep -q '10.0.0.10' && echo 'MASTER' || echo 'BACKUP'"
done

echo -e "\n=== VRRP Stats ==="
docker exec keepalived cat /tmp/keepalived.stats 2>/dev/null || echo "Stats not available"

echo -e "\n=== Recent Logs ==="
docker logs --tail 20 keepalived | grep -E "(MASTER|BACKUP|FAULT|transition)"

Test de failover VIP

# Identifier le MASTER actuel
for host in controller-{1,2,3}; do
    ssh $host "ip addr show | grep -q '10.0.0.10' && echo 'VIP on $host'"
done

# Simuler une panne du MASTER
ssh controller-1 "docker stop keepalived"

# Vérifier le failover
sleep 5
for host in controller-{2,3}; do
    ssh $host "ip addr show | grep -q '10.0.0.10' && echo 'VIP migrated to $host'"
done

# Vérifier la connectivité
ping -c 3 10.0.0.10

# Restaurer
ssh controller-1 "docker start keepalived"

Monitoring Keepalived

# Script de monitoring continu
#!/bin/bash
# monitor-vip.sh

VIP="10.0.0.10"
LOG_FILE="/var/log/vip-monitor.log"

while true; do
    CURRENT=$(for h in controller-{1,2,3}; do
        ssh -o ConnectTimeout=1 $h "ip addr show | grep -q $VIP && echo $h" 2>/dev/null
    done)

    if [ -z "$CURRENT" ]; then
        echo "$(date): ALERT - VIP $VIP not found on any controller!" | tee -a $LOG_FILE
    else
        echo "$(date): VIP $VIP active on $CURRENT" >> $LOG_FILE
    fi

    sleep 5
done

Ressources

Checkpoint

  • Keepalived est déployé sur tous les controllers
  • La VIP est accessible depuis les clients
  • Le failover fonctionne en < 5 secondes
  • Les health checks HAProxy déclenchent le failover
  • Les logs montrent les transitions MASTER/BACKUP
  • Le preemption est configuré selon les besoins