proxmox-node-config/manage-lxc-vm-notes.sh

452 lines
17 KiB
Bash
Executable File

#!/bin/bash
# Ejecutar de forma tolerante a fallos por VM; no abortar todo el script.
SUMMARY_LINES=()
check_lxc_tailscale_issue() {
local vmid="$1"
pct exec "$vmid" -- sh -c '
if ! command -v tailscale >/dev/null 2>&1; then
echo tailscale-not-installed
exit 0
fi
if command -v systemctl >/dev/null 2>&1; then
systemctl is-active --quiet tailscaled || echo tailscale-down
elif command -v rc-service >/dev/null 2>&1 || [ -e /etc/init.d/tailscaled ]; then
rc-service tailscale status >/dev/null 2>&1 || echo tailscale-down
fi
' 2>/dev/null | tr -d '\r' | head -n1
}
check_lxc_docker_issue() {
local vmid="$1"
pct exec "$vmid" -- sh -c '
DAEMON_JSON="/etc/docker/daemon.json"
if [ ! -f "$DAEMON_JSON" ]; then
echo docker-missing:daemon.json
exit 0
fi
if ! command -v jq >/dev/null 2>&1; then
echo docker-check-skipped-no-jq
exit 0
fi
missing=""
# metrics-addr
metrics=$(jq -r ".\"metrics-addr\" // empty" "$DAEMON_JSON")
[ "$metrics" = "0.0.0.0:9023" ] || missing="${missing},metrics-addr"
# experimental
exp=$(jq -r ".experimental // false" "$DAEMON_JSON")
[ "$exp" = "true" ] || missing="${missing},experimental"
# hosts
has_unix=$(jq -r ".hosts // [] | index(\"unix:///var/run/docker.sock\") | type" "$DAEMON_JSON" 2>/dev/null || echo null)
[ "$has_unix" = "number" ] || missing="${missing},hosts-unix"
has_tcp=$(jq -r ".hosts // [] | index(\"tcp://0.0.0.0:2375\") | type" "$DAEMON_JSON" 2>/dev/null || echo null)
[ "$has_tcp" = "number" ] || missing="${missing},hosts-tcp"
if [ -z "$missing" ]; then
echo OK
else
echo docker-missing:${missing#,}
fi
' 2>/dev/null | tr -d '\r' | head -n1
}
# Comprueba si los puertos de Docker están escuchando (2375 API y 9023 métricas)
check_lxc_docker_ports() {
local vmid="$1"
pct exec "$vmid" -- sh -c '
is_listening_port() {
p="$1"
if command -v ss >/dev/null 2>&1; then
ss -lnt | awk "{print \$4}" | tr -d " " | grep -Eq "(^|.*:)(0\\.0\\.0\\.0|\*|\\[::\\]|::):${p}$"
else
netstat -lnt 2>/dev/null | awk "{print \$4}" | tr -d " " | grep -Eq "(^|.*:)(0\\.0\\.0\\.0|\*|\\[::\\]|::):${p}$"
fi
}
miss=""
is_listening_port 2375 || miss="${miss},api2375"
is_listening_port 9023 || miss="${miss},metrics9023"
if [ -z "$miss" ]; then
echo docker-ports:OK
else
echo docker-ports:missing${miss}
fi
' 2>/dev/null | tr -d '\r' | head -n1
}
# Estado del demonio Docker (arriba/abajo)
check_lxc_docker_daemon() {
local vmid="$1"
pct exec "$vmid" -- sh -c '
if command -v systemctl >/dev/null 2>&1; then
systemctl is-active --quiet docker && echo docker:up || echo docker:down
elif command -v rc-service >/dev/null 2>&1; then
rc-service docker status >/dev/null 2>&1 && echo docker:up || echo docker:down
else
echo docker:unknown
fi
' 2>/dev/null | tr -d '\r' | head -n1
}
collect_lxc_summary() {
local vmid="$1"
local name ip issues=""
name=$(pct exec "$vmid" -- sh -c "hostname -s 2>/dev/null || cat /etc/hostname 2>/dev/null || echo lxc-$vmid" 2>/dev/null | head -n1 | tr -d '\r')
ip=$(pct exec "$vmid" -- sh -c '
hostname -i 2>/dev/null || true; ')
# Tailcale check
ts_issue=$(check_lxc_tailscale_issue "$vmid")
if [ -n "$ts_issue" ]; then
issues="${issues}${ts_issue}"
fi
# Docker config check
dk_issue=$(check_lxc_docker_issue "$vmid")
if [ -n "$dk_issue" ] && [ "$dk_issue" != "OK" ]; then
issues="${issues}${issues:+;}${dk_issue}"
fi
# Docker daemon status
dk_daemon=$(check_lxc_docker_daemon "$vmid")
if [ -n "$dk_daemon" ]; then
issues="${issues}${issues:+;}${dk_daemon}"
fi
# Docker ports listening
dk_ports=$(check_lxc_docker_ports "$vmid")
if [ -n "$dk_ports" ]; then
issues="${issues}${issues:+;}${dk_ports}"
fi
if [ -n "$issues" ]; then
SUMMARY_LINES+=("LXC $vmid | ${name:-lxc-$vmid} | ${ip:-no-ip-in-range} | ${issues}")
else
SUMMARY_LINES+=("LXC $vmid | ${name:-lxc-$vmid} | ${ip:-no-ip-in-range}")
fi
}
collect_vm_summary() {
local vmid="$1"
local name ip
name=$(qm config "$vmid" 2>/dev/null | awk '/^name:/{ $1=""; sub(/^ /,"",$0); print; exit }')
# Try modern guest cmd first
ip=$(qm guest cmd "$vmid" network-get-interfaces 2>/dev/null | awk -F '"' '/"ip-address"/ {print $4}' | grep '^192\\.168\\.1\\.' | tr '\n' ' ' | sed 's/ *$//' || true)
# Fallback to older agent command
if [ -z "$ip" ]; then
ip=$(qm agent "$vmid" network-get-interfaces 2>/dev/null | awk -F '"' '/"ip-address"/ {print $4}' | grep '^192\\.168\\.1\\.' | tr '\n' ' ' | sed 's/ *$//' || true)
fi
SUMMARY_LINES+=("VM $vmid | ${name:-vm-$vmid} | ${ip:-no-ip-in-range/agent-off}")
}
update_notes() {
local vmid="$1"
local type="$2"
echo "[INFO] Procesando $type $vmid..."
# Ruta del archivo de configuración del contenedor (host)
local conf_path="/etc/pve/lxc/${vmid}.conf"
# Sanear conf: comentar líneas mal formadas de notas previas (p. ej. "homepage : image")
# Solo afectamos patrones con espacio antes de los dos puntos, que no son válidos en PVE.
if [ -f "$conf_path" ]; then
if grep -qE '^[^#].*\s: ' "$conf_path"; then
echo "[WARN] $conf_path contiene líneas no válidas con ' : '. Comentándolas para restaurar parseo..."
sed -i -E 's/^([^#].*\s: .*)$/# \1/' "$conf_path" || true
fi
# Eliminar bloque anterior de notas comentadas si existe
sed -i '/^# Notes:/,$d' "$conf_path" || true
fi
# Detectar OS
os=$(pct exec "$vmid" -- cat /etc/os-release 2>/dev/null | grep '^ID=' | cut -d= -f2 | tr -d '"')
[ -z "$os" ] && echo "[WARN] No se pudo detectar OS para $type $vmid. Saltando." && return
echo "[INFO] Sistema operativo detectado: $os"
# Comandos por OS
case "$os" in
alpine)
update_cmd="tries=5; for i in \$(seq 1 \$tries); do if apk update; then ok=1; break; else echo \"[WARN] apk update failed (\$i/\$tries). Retrying in \$((i*2))s...\"; sleep \$((i*2)); fi; done; [ \"\${ok:-0}\" = \"1\" ] || echo \"[WARN] Proceeding with stale indexes\"; apk upgrade --no-cache || echo \"[WARN] apk upgrade failed; continuing\""
install_base_pkgs="apk add --no-cache docker openssh jq bash"
# Comprobar si tailscale está instalado
check_tailscale_installed=$(pct exec "$vmid" -- sh -c "which tailscale 2>/dev/null || echo notfound")
if [[ "$check_tailscale_installed" == "notfound" ]]; then
install_tailscale="apk add --no-cache tailscale"
else
install_tailscale="echo '[INFO] tailscale ya está instalado'"
fi
enable_docker="rc-update add docker && rc-service docker start"
enable_ssh="rc-update add sshd && rc-service sshd start"
enable_tailscale_check='\
if [ -e /etc/init.d/tailscaled ] || [ -e /etc/init.d/tailscale ]; then \
(rc-update add tailscaled default 2>/dev/null || rc-update add tailscale default 2>/dev/null) && \
(rc-service tailscaled start 2>/dev/null || rc-service tailscale start 2>/dev/null); \
fi'
check_tailscale_status='\
if [ -e /etc/init.d/tailscaled ] || [ -e /etc/init.d/tailscale ]; then \
(rc-service tailscaled status 2>/dev/null || rc-service tailscale status 2>/dev/null) || true; \
else \
echo notinstalled; \
fi'
;;
ubuntu)
update_cmd="apt-get update && DEBIAN_FRONTEND=noninteractive apt-get upgrade -y"
install_base_pkgs="apt-get install -y docker.io openssh-server jq bash"
check_tailscale_installed=$(pct exec "$vmid" -- sh -c "which tailscale 2>/dev/null || echo notfound")
if [[ "$check_tailscale_installed" == "notfound" ]]; then
install_tailscale="apt-get install -y tailscale"
else
install_tailscale="echo '[INFO] tailscale ya está instalado'"
fi
enable_docker="systemctl enable docker && systemctl start docker"
enable_ssh="systemctl enable ssh && systemctl start ssh"
enable_tailscale_check="systemctl enable tailscaled && systemctl start tailscaled"
check_tailscale_status="systemctl is-active tailscaled"
;;
*)
echo "[WARN] OS no soportado: $os. Saltando $vmid."
return
;;
esac
# Ejecutar comandos con tolerancia a errores (no romper el flujo)
pct exec "$vmid" -- sh -c "$update_cmd" || echo "[WARN] update falló en $type $vmid, continúo"
pct exec "$vmid" -- sh -c "$install_base_pkgs || true" || true
pct exec "$vmid" -- sh -c "$install_tailscale || true" || true
pct exec "$vmid" -- sh -c "$enable_docker" || echo "[WARN] no se pudo habilitar docker en $type $vmid"
pct exec "$vmid" -- sh -c "$enable_ssh" || echo "[WARN] no se pudo habilitar ssh en $type $vmid"
pct exec "$vmid" -- sh -c "$enable_tailscale_check" || true
echo "[INFO] Verificando configuración del Docker API y métricas en $type $vmid..."
WAIT_SECS_HOST="${DOCKER_WAIT_SECONDS:-30}"
pct exec "$vmid" -- env DOCKER_WAIT_SECONDS="$WAIT_SECS_HOST" bash -x -c '
set -e
mkdir -p /etc/docker
DAEMON_JSON="/etc/docker/daemon.json"
TMP_JSON="/etc/docker/daemon.json.tmp"
ensure_json() {
if [ -f "$DAEMON_JSON" ]; then
echo "[INFO] daemon.json existente encontrado. Verificando llaves requeridas..."
if ! command -v jq >/dev/null 2>&1; then
echo "[ERROR] jq no está instalado; no se puede actualizar JSON de Docker." >&2
exit 1
fi
JQF="/tmp/daemon_filter.jq"
cat > "$JQF" <<\JQ
(.hosts = ((.hosts // []) + ["unix:///var/run/docker.sock","tcp://0.0.0.0:2375"]) | .hosts |= unique)
| .["metrics-addr"] = "0.0.0.0:9023"
| .experimental = true
JQ
jq -f "$JQF" "$DAEMON_JSON" > "$TMP_JSON"
rm -f "$JQF"
else
echo "[INFO] Creando daemon.json con configuración por defecto (API TCP + métricas)."
cat > "$TMP_JSON" <<\JSON
{
"hosts": ["unix:///var/run/docker.sock", "tcp://0.0.0.0:2375"],
"metrics-addr": "0.0.0.0:9023",
"experimental": true
}
JSON
fi
if [ ! -f "$DAEMON_JSON" ] || ! cmp -s "$DAEMON_JSON" "$TMP_JSON"; then
echo "[INFO] Aplicando cambios a daemon.json"
cp "$TMP_JSON" "$DAEMON_JSON"
JSON_CHANGED=1
else
echo "[INFO] daemon.json ya contiene la configuración requerida."
JSON_CHANGED=0
fi
rm -f "$TMP_JSON"
}
apply_systemd_override_if_needed() {
# Sólo para sistemas con systemd (Ubuntu)
if command -v systemctl >/dev/null 2>&1; then
echo "[INFO] Detectado systemd. Verificando override de servicio Docker..."
mkdir -p /etc/systemd/system/docker.service.d
OVERRIDE="/etc/systemd/system/docker.service.d/override.conf"
NEED_OVERRIDE=0
if systemctl cat docker 2>/dev/null | grep -qE "ExecStart=.*-H fd://"; then
NEED_OVERRIDE=1
fi
if [ $NEED_OVERRIDE -eq 1 ]; then
echo "[INFO] Creando override de systemd para remover -H fd:// y permitir hosts desde daemon.json"
cat > "$OVERRIDE" <<\OVR
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd
OVR
systemctl daemon-reload
OVERRIDE_APPLIED=1
else
echo "[INFO] No se requiere override de systemd."
OVERRIDE_APPLIED=0
fi
else
OVERRIDE_APPLIED=0
fi
}
restart_docker() {
if command -v systemctl >/dev/null 2>&1; then
systemctl restart docker || true
if systemctl is-active --quiet docker; then
echo "[INFO] Docker reiniciado (systemd)."
else
echo "[WARN] Docker no está activo tras reinicio (systemd)."
fi
elif command -v rc-service >/dev/null 2>&1; then
rc-service docker restart || true
rc-service docker status || true
echo "[INFO] Docker reiniciado (OpenRC)."
else
echo "[WARN] No se pudo detectar gestor de servicios para reiniciar Docker."
fi
}
wait_for_docker_ready() {
echo "[INFO] Esperando a que Docker esté operativo..."
READY=0
LIMIT=${DOCKER_WAIT_SECONDS:-30}
for i in $(seq 1 "$LIMIT"); do
if docker info >/dev/null 2>&1; then
READY=1; break
fi
sleep 1
done
if [ $READY -eq 1 ]; then
echo "[INFO] Docker respondió a docker info."
else
echo "[WARN] Docker no respondió tras el tiempo de espera."
fi
}
wait_for_endpoints() {
echo "[INFO] Esperando endpoints TCP 2375 y 9023 (IPv4/IPv6)..."
is_listening_port() {
p="$1"
if command -v ss >/dev/null 2>&1; then
ss -lnt | awk '{print $4}' | tr -d ' ' | grep -Eq "(^|.*:)(0\.0\.0\.0|\*|\[::\]|::):${p}$"
else
netstat -lnt 2>/dev/null | awk '{print $4}' | tr -d ' ' | grep -Eq "(^|.*:)(0\.0\.0\.0|\*|\[::\]|::):${p}$"
fi
}
EP=0
LIMIT=${DOCKER_WAIT_SECONDS:-30}
for i in $(seq 1 "$LIMIT"); do
API_OK=0; MET_OK=0
is_listening_port 2375 && API_OK=1
is_listening_port 9023 && MET_OK=1
if [ $API_OK -eq 1 ] && [ $MET_OK -eq 1 ]; then EP=1; break; fi
sleep 1
done
if [ $EP -eq 1 ]; then
echo "[INFO] Endpoints confirmados."
else
echo "[WARN] Endpoints no confirmados tras el tiempo de espera."
fi
}
check_listeners() {
echo "[INFO] Comprobando puertos en escucha para Docker..."
if command -v ss >/dev/null 2>&1; then
ss -lnt | awk '{print}'
elif command -v netstat >/dev/null 2>&1; then
netstat -lnt || true
fi
is_listening_port() {
p="$1"
if command -v ss >/dev/null 2>&1; then
ss -lnt | awk '{print $4}' | tr -d ' ' | grep -Eq "(^|.*:)(0\.0\.0\.0|\*|\[::\]|::):${p}$"
else
netstat -lnt 2>/dev/null | awk '{print $4}' | tr -d ' ' | grep -Eq "(^|.*:)(0\.0\.0\.0|\*|\[::\]|::):${p}$"
fi
}
if is_listening_port 2375; then
echo "[INFO] API Docker TCP escuchando en 2375 (IPv4/IPv6)"
else
echo "[WARN] API Docker TCP NO detectada en 2375"
fi
if is_listening_port 9023; then
echo "[INFO] Métricas Docker escuchando en 9023 (IPv4/IPv6)"
else
echo "[WARN] Métricas Docker NO detectadas en 9023"
fi
}
ensure_json
apply_systemd_override_if_needed
if [ ${JSON_CHANGED:-0} -eq 1 ] || [ ${OVERRIDE_APPLIED:-0} -eq 1 ]; then
restart_docker
wait_for_docker_ready
wait_for_endpoints
else
echo "[INFO] Configuración de Docker ya presente; no se requiere reinicio."
fi
check_listeners
'
# Estado tailscale
tailscale_status=$(pct exec "$vmid" -- sh -c "$check_tailscale_status" 2>&1)
if echo "$tailscale_status" | grep -Eq "started|active"; then
tailscale_note="tailscale UP"
elif echo "$tailscale_status" | grep -q "notinstalled"; then
tailscale_note="tailscale NOT INSTALLED"
else
tailscale_note="tailscale DOWN"
# Intentar levantar tailscale si está instalado pero abajo
pct exec "$vmid" -- sh -lc 'if command -v tailscale >/dev/null 2>&1; then (sudo -n tailscale up || sudo tailscale up || tailscale up) >/dev/null 2>&1 || true; fi' || true
# Re-chequear estado tras el intento
tailscale_status=$(pct exec "$vmid" -- sh -c "$check_tailscale_status" 2>&1)
if echo "$tailscale_status" | grep -Eq "started|active"; then
tailscale_note="tailscale UP"
fi
fi
# Obtener contenedores Docker
docker_info=$(pct exec "$vmid" -- sh -c "docker ps --format '{{.Names}} : {{.Image}}'" 2>/dev/null || echo "Sin contenedores")
if [[ -z "$docker_info" ]]; then
docker_info="Sin contenedores"
fi
# Formar nota (todas las líneas comentadas para no romper el .conf)
docker_info_commented=$(printf "%s\n" "$docker_info" | sed "s/^/# /")
full_note=$(printf "# Notes:\n# %s\n%s\n" "$tailscale_note" "$docker_info_commented")
# Añadir notas al archivo de configuración del contenedor
if [ -f "$conf_path" ]; then
echo -e "$full_note" >> "$conf_path"
echo "[INFO] Notas actualizadas en $conf_path:"
else
echo "[WARN] No existe $conf_path; no se pueden escribir notas."
fi
echo "$full_note"
}
echo "[INFO] Iniciando gestión automática de LXC..."
# Iterar sobre LXC activos (no detener por fallos puntuales)
for vmid in $(pct list | awk 'NR>1 {print $1}'); do
if ! update_notes "$vmid" "LXC"; then
echo "[ERROR] Falló actualización de notas para LXC $vmid; continúo con el resto."
fi
collect_lxc_summary "$vmid" || echo "[WARN] No se pudo recoger resumen para LXC $vmid"
done
# VMs: intentar obtener IPs vía QEMU Guest Agent (si disponible)
for vmid in $(qm list | awk 'NR>1 && $2 == "running" {print $1}'); do
collect_vm_summary "$vmid"
done
echo "[INFO] Resumen final (nombre e IP 192.168.1.*):"
for line in "${SUMMARY_LINES[@]}"; do
echo "$line"
done
echo "[INFO] Script completado."