outposts: improve controller error handling

This commit is contained in:
Jens Langhammer 2020-10-16 11:31:31 +02:00
parent 91ce7f7363
commit f6b8171624
3 changed files with 61 additions and 49 deletions

View File

@ -3,9 +3,14 @@ from typing import Dict
from structlog import get_logger from structlog import get_logger
from passbook.lib.sentry import SentryIgnoredException
from passbook.outposts.models import Outpost from passbook.outposts.models import Outpost
class ControllerException(SentryIgnoredException):
"""Exception raise when anything fails during controller run"""
class BaseController: class BaseController:
"""Base Outpost deployment controller""" """Base Outpost deployment controller"""

View File

@ -2,12 +2,12 @@
from typing import Dict, Tuple from typing import Dict, Tuple
from docker import DockerClient, from_env from docker import DockerClient, from_env
from docker.errors import NotFound from docker.errors import DockerException, NotFound
from docker.models.containers import Container from docker.models.containers import Container
from yaml import safe_dump from yaml import safe_dump
from passbook import __version__ from passbook import __version__
from passbook.outposts.controllers.base import BaseController from passbook.outposts.controllers.base import BaseController, ControllerException
class DockerController(BaseController): class DockerController(BaseController):
@ -62,43 +62,46 @@ class DockerController(BaseController):
) )
def run(self): def run(self):
container, has_been_created = self._get_container() try:
if has_been_created: container, has_been_created = self._get_container()
return None if has_been_created:
# Check if the container is out of date, delete it and retry return None
if len(container.image.tags) > 0: # Check if the container is out of date, delete it and retry
tag: str = container.image.tags[0] if len(container.image.tags) > 0:
_, _, version = tag.partition(":") tag: str = container.image.tags[0]
if version != __version__: _, _, version = tag.partition(":")
self.logger.info( if version != __version__:
"Container has mismatched version, re-creating...", self.logger.info(
has=version, "Container has mismatched version, re-creating...",
should=__version__, has=version,
) should=__version__,
)
container.kill()
container.remove(force=True)
return self.run()
# Check that container values match our values
if self._comp_env(container):
self.logger.info("Container has outdated config, re-creating...")
container.kill() container.kill()
container.remove(force=True) container.remove(force=True)
return self.run() return self.run()
# Check that container values match our values # Check that container is healthy
if self._comp_env(container): if (
self.logger.info("Container has outdated config, re-creating...") container.status == "running"
container.kill() and container.attrs.get("State", {}).get("Health", {}).get("Status", "")
container.remove(force=True) != "healthy"
return self.run() ):
# Check that container is healthy # At this point we know the config is correct, but the container isn't healthy,
if ( # so we just restart it with the same config
container.status == "running" self.logger.info("Container is unhealthy, restarting...")
and container.attrs.get("State", {}).get("Health", {}).get("Status", "") container.restart()
!= "healthy" # Check that container is running
): if container.status != "running":
# At this point we know the config is correct, but the container isn't healthy, self.logger.info("Container is not running, restarting...")
# so we just restart it with the same config container.start()
self.logger.info("Container is unhealthy, restarting...") return None
container.restart() except DockerException as exc:
# Check that container is running raise ControllerException from exc
if container.status != "running":
self.logger.info("Container is not running, restarting...")
container.start()
return None
def get_static_deployment(self) -> str: def get_static_deployment(self) -> str:
"""Generate docker-compose yaml for proxy, version 3.5""" """Generate docker-compose yaml for proxy, version 3.5"""

View File

@ -1,11 +1,12 @@
"""Kubernetes deployment controller""" """Kubernetes deployment controller"""
from io import StringIO from io import StringIO
from kubernetes.client import OpenApiException
from kubernetes.config import load_incluster_config, load_kube_config from kubernetes.config import load_incluster_config, load_kube_config
from kubernetes.config.config_exception import ConfigException from kubernetes.config.config_exception import ConfigException
from yaml import dump_all from yaml import dump_all
from passbook.outposts.controllers.base import BaseController from passbook.outposts.controllers.base import BaseController, ControllerException
from passbook.outposts.controllers.k8s.deployment import DeploymentReconciler from passbook.outposts.controllers.k8s.deployment import DeploymentReconciler
from passbook.outposts.controllers.k8s.secret import SecretReconciler from passbook.outposts.controllers.k8s.secret import SecretReconciler
from passbook.outposts.controllers.k8s.service import ServiceReconciler from passbook.outposts.controllers.k8s.service import ServiceReconciler
@ -23,21 +24,24 @@ class KubernetesController(BaseController):
def run(self): def run(self):
"""Called by scheduled task to reconcile deployment/service/etc""" """Called by scheduled task to reconcile deployment/service/etc"""
namespace = self.outpost.config.kubernetes_namespace try:
namespace = self.outpost.config.kubernetes_namespace
secret_reconciler = SecretReconciler(self.outpost) secret_reconciler = SecretReconciler(self.outpost)
secret_reconciler.namespace = namespace secret_reconciler.namespace = namespace
secret_reconciler.run() secret_reconciler.run()
deployment_reconciler = DeploymentReconciler(self.outpost) deployment_reconciler = DeploymentReconciler(self.outpost)
deployment_reconciler.namespace = namespace deployment_reconciler.namespace = namespace
deployment_reconciler.deployment_ports = self.deployment_ports deployment_reconciler.deployment_ports = self.deployment_ports
deployment_reconciler.run() deployment_reconciler.run()
service_reconciler = ServiceReconciler(self.outpost) service_reconciler = ServiceReconciler(self.outpost)
service_reconciler.namespace = namespace service_reconciler.namespace = namespace
service_reconciler.deployment_ports = self.deployment_ports service_reconciler.deployment_ports = self.deployment_ports
service_reconciler.run() service_reconciler.run()
except OpenApiException as exc:
raise ControllerException from exc
def get_static_deployment(self) -> str: def get_static_deployment(self) -> str:
secret_reconciler = SecretReconciler(self.outpost) secret_reconciler = SecretReconciler(self.outpost)