Added async functionality on backends

This commit is contained in:
Marc Aymerich 2015-05-06 14:39:25 +00:00
parent 31ac035c27
commit 39bf68caad
15 changed files with 195 additions and 116 deletions

14
TODO.md
View file

@ -334,7 +334,7 @@ pip3 install https://github.com/APSL/django-mailer-2/archive/master.zip
# all signals + accouns.register() services.register() on apps.py
# if backend.async: don't join.
# RELATED: domains.sync to ns3 make it async
# RELATED: domains.sync to ns3 make it async backend rather than cron based ?
from orchestra.contrib.tasks import task
import time, sys
@ -358,11 +358,8 @@ TODO mount the filesystem with "nosuid" option
# autoiscover modules on app.ready()
# uwse uwsgi cron: decorator or config cron = 59 2 -1 -1 -1 %(virtualenv)/bin/python manage.py runmyfunnytask
# SecondaryMailServerBackend and check_origin signal
try: import uwsgi to know its running uwsgi
# avoid cron email errors when failing hard
# mailboxes.address settings multiple local domains, not only one?
# backend.context = self.get_context() or save(obj, context=None)
@ -372,20 +369,11 @@ try: import uwsgi to know its running uwsgi
# MERGE beats and inspect INSTALLED_APPS and get IS_ENABLED
# make exceptions fot check origin shit
# rename virtual_maps to virtual_alias_maps and remove virtual_alias_domains ?
# virtdomains file is not ideal, prevent fake/error on domains there! and make sure this file is required!
# Message last_retry auto_now doesn't work!
# LOCK beat lockfile, all tasks (monitors, billing (or unique number will fuck the transaction?))
# don't block on beat, and --report periodic tasks
# Deprecate restart/start/stop services (do touch wsgi.py and fuck celery)
# high perf apache sync,

View file

@ -25,10 +25,10 @@ STATE_COLORS = {
class RouteAdmin(admin.ModelAdmin):
list_display = (
'backend', 'host', 'match', 'display_model', 'display_actions', 'is_active'
'backend', 'host', 'match', 'display_model', 'display_actions', 'async', 'is_active'
)
list_editable = ('host', 'match', 'is_active')
list_filter = ('host', 'is_active', 'backend')
list_editable = ('host', 'match', 'async', 'is_active')
list_filter = ('host', 'is_active', 'async', 'backend')
ordering = ('backend',)
BACKEND_HELP_TEXT = helpers.get_backends_help_text(ServiceBackend.get_backends())

View file

@ -40,6 +40,7 @@ class ServiceBackend(plugins.Plugin, metaclass=ServiceMount):
script_executable = '/bin/bash'
function_method = methods.Python
type = 'task' # 'sync'
# Don't wait for the backend to finish before continuing with request/response
ignore_fields = []
actions = []
default_route_match = 'True'
@ -160,15 +161,22 @@ class ServiceBackend(plugins.Plugin, metaclass=ServiceMount):
time = timezone.now().strftime("%h %d, %Y %I:%M:%S %Z")
return "Generated by Orchestra at %s" % time
def execute(self, server, async=False):
def create_log(self, server, **kwargs):
from .models import BackendLog
scripts = self.scripts
state = BackendLog.STARTED
run = bool(scripts) or (self.force_empty_action_execution or bool(self.content))
state = BackendLog.RECEIVED
run = bool(self.scripts) or (self.force_empty_action_execution or bool(self.content))
if not run:
state = BackendLog.NOTHING
log = BackendLog.objects.create(backend=self.get_name(), state=state, server=server)
return log
def execute(self, server, async=False, log=None):
from .models import BackendLog
if log is None:
log = self.create_log(server)
run = log.state != BackendLog.NOTHING
if run:
scripts = self.scripts
for method, commands in scripts:
method(log, server, commands, async)
if log.state != BackendLog.SUCCESS:

View file

@ -70,9 +70,18 @@ def send_report(method, args, log):
mail_admins(subject, message, html_message=html_message)
def get_backend_url(ids):
if len(ids) == 1:
return reverse('admin:orchestration_backendlog_change', args=ids)
elif len(ids) > 1:
url = reverse('admin:orchestration_backendlog_changelist')
return url + '?id__in=%s' % ','.join(map(str, ids))
return ''
def message_user(request, logs):
total, successes = 0, 0
total, successes, async = 0, 0, 0
ids = []
async_ids = []
for log in logs:
total += 1
if log.state != log.EXCEPTION:
@ -80,25 +89,42 @@ def message_user(request, logs):
ids.append(log.pk)
if log.state in (log.SUCCESS, log.NOTHING):
successes += 1
errors = total-successes
if len(ids) == 1:
url = reverse('admin:orchestration_backendlog_change', args=ids)
href = '<a href="{}">backends</a>'.format(url)
elif len(ids) > 1:
url = reverse('admin:orchestration_backendlog_changelist')
url += '?id__in=%s' % ','.join(map(str, ids))
href = '<a href="{}">backends</a>'.format(url)
else:
href = ''
elif log.state in (log.RECEIVED, log.STARTED):
async += 1
async_ids.append(log.id)
errors = total-successes-async
url = get_backend_url(ids)
async_url = get_backend_url(async_ids)
async_msg = ''
if async:
async_msg = ungettext(
_('<a href="{async_url}">{async} backend</a> is running on the background'),
_('<a href="{async_url}">{async} backends</a> are running on the background'),
async)
if errors:
msg = ungettext(
_('{errors} out of {total} {href} has fail to execute.'),
_('{errors} out of {total} {href} have fail to execute.'),
_('<a href="{url}">{errors} out of {total} backend</a> has fail to execute'),
_('<a href="{url}">{errors} out of {total} backends</a> have fail to execute'),
errors)
messages.error(request, mark_safe(msg.format(errors=errors, total=total, href=href)))
if async_msg:
msg += ', ' + str(async_msg)
msg = msg.format(errors=errors, async=async, async_url=async_url, total=total, url=url)
messages.error(request, mark_safe(msg + '.'))
elif successes:
if async_msg:
msg = ungettext(
_('<a href="{url}">{successes} out of {total} backend</a> has been executed'),
_('<a href="{url}">{successes} out of {total} backends</a> have been executed'),
successes)
msg += ', ' + str(async_msg)
else:
msg = ungettext(
_('<a href="{url}">{total} backend</a> has been executed'),
_('<a href="{url}">{total} backends</a> have been executed'),
total)
msg = msg.format(total=total, url=url, async_url=async_url, async=async, successes=successes)
messages.success(request, mark_safe(msg + '.'))
else:
msg = ungettext(
_('{total} {href} has been executed.'),
_('{total} {href} have been executed.'),
total)
messages.success(request, mark_safe(msg.format(total=total, href=href)))
msg = async_msg.format(url=url, async_url=async_url, async=async)
messages.success(request, mark_safe(msg + '.'))

View file

@ -2,6 +2,7 @@ import logging
import threading
import traceback
from collections import OrderedDict
from functools import partial
from django.core.mail import mail_admins
@ -19,7 +20,7 @@ logger = logging.getLogger(__name__)
router = import_class(settings.ORCHESTRATION_ROUTER)
def as_task(execute):
def as_task(execute, log, operations):
def wrapper(*args, **kwargs):
""" send report """
# Tasks run on a separate transaction pool (thread), no need to temper with the transaction
@ -33,12 +34,21 @@ def as_task(execute):
logger.error(subject)
logger.error(message)
mail_admins(subject, message)
log.state = BackendLog.EXCEPTION
log.stderr = traceback.format_exc()
log.save(update_fields=('state', 'stderr'))
# We don't propagate the exception further to avoid transaction rollback
else:
# Using the wrapper function as threader messenger for the execute output
# Absense of it will indicate a failure at this stage
wrapper.log = log
return log
finally:
# Store the operation
for operation in operations:
logger.info("Executed %s" % str(operation))
if operation.instance.pk:
# Not all backends are called with objects saved on the database
operation.store(log)
stdout = log.stdout.strip()
stdout and logger.debug('STDOUT %s', stdout)
stderr = log.stderr.strip()
stderr and logger.debug('STDERR %s', stderr)
return wrapper
@ -92,41 +102,31 @@ def execute(scripts, block=False, async=False):
logger.info('Orchestration execution is dissabled by ORCHESTRATION_DISABLE_EXECUTION settings.')
return []
# Execute scripts on each server
threads = []
executions = []
threads_to_join = []
logs = []
for key, value in scripts.items():
server, __ = key
backend, operations = value
execute = as_task(backend.execute)
args = (server,)
kwargs = {
'async': async or server.async
}
log = backend.create_log(*args, **kwargs)
kwargs['log'] = log
task = as_task(backend.execute, log, operations)
logger.debug('%s is going to be executed on %s' % (backend, server))
if block:
# Execute one backend at a time, no need for threads
execute(server, async=async)
task(*args, **kwargs)
else:
execute = close_connection(execute)
thread = threading.Thread(target=execute, args=(server,), kwargs={'async': async})
task = close_connection(task)
thread = threading.Thread(target=task, args=args, kwargs=kwargs)
thread.start()
threads.append(thread)
executions.append((execute, operations))
[ thread.join() for thread in threads ]
logs = []
# collect results
for execution, operations in executions:
# There is no log if an exception has been rised at the very end of the execution
if hasattr(execution, 'log'):
for operation in operations:
logger.info("Executed %s" % str(operation))
if operation.instance.pk:
# Not all backends are called with objects saved on the database
operation.store(execution.log)
stdout = execution.log.stdout.strip()
stdout and logger.debug('STDOUT %s', stdout)
stderr = execution.log.stderr.strip()
stderr and logger.debug('STDERR %s', stderr)
logs.append(execution.log)
else:
mocked_log = BackendLog(state=BackendLog.EXCEPTION)
logs.append(mocked_log)
if not server.async:
threads_to_join.append(thread)
logs.append(log)
[ thread.join() for thread in threads_to_join ]
return logs

View file

@ -33,8 +33,9 @@ def SSH(backend, log, server, cmds, async=False):
digest = hashlib.md5(bscript).hexdigest()
path = os.path.join(settings.ORCHESTRATION_TEMP_SCRIPT_DIR, digest)
remote_path = "%s.remote" % path
log.state = log.STARTED
log.script = '# %s\n%s' % (remote_path, script)
log.save(update_fields=['script'])
log.save(update_fields=('script', 'state'))
if not cmds:
return
channel = None
@ -133,8 +134,9 @@ def Python(backend, log, server, cmds, async=False):
# TODO collect stdout?
script = [ str(cmd.func.__name__) + str(cmd.args) for cmd in cmds ]
script = json.dumps(script, indent=4).replace('"', '')
log.state = log.STARTED
log.script = '\n'.join([log.script, script])
log.save(update_fields=['script'])
log.save(update_fields=('script', 'state'))
try:
for cmd in cmds:
with CaptureStdout() as stdout:

View file

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('orchestration', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='route',
name='async',
field=models.BooleanField(help_text='Whether or not block the request/response cycle waitting this backend to finish its execution.', default=False),
),
migrations.AlterField(
model_name='backendlog',
name='state',
field=models.CharField(verbose_name='state', choices=[('RECEIVED', 'RECEIVED'), ('TIMEOUT', 'TIMEOUT'), ('STARTED', 'STARTED'), ('SUCCESS', 'SUCCESS'), ('FAILURE', 'FAILURE'), ('ERROR', 'ERROR'), ('ABORTED', 'ABORTED'), ('REVOKED', 'REVOKED'), ('NOTHING', 'NOTHING')], default='RECEIVED', max_length=16),
),
migrations.AlterField(
model_name='backendlog',
name='stderr',
field=models.TextField(verbose_name='stderr'),
),
migrations.AlterField(
model_name='route',
name='backend',
field=models.CharField(choices=[('Apache2Traffic', '[M] Apache 2 Traffic'), ('DovecotMaildirDisk', '[M] Dovecot Maildir size'), ('Exim4Traffic', '[M] Exim4 traffic'), ('MailmanSubscribers', '[M] Mailman subscribers'), ('MailmanTraffic', '[M] Mailman traffic'), ('MysqlDisk', '[M] MySQL disk'), ('OpenVZTraffic', '[M] OpenVZTraffic'), ('PostfixMailscannerTraffic', '[M] Postfix-Mailscanner traffic'), ('UNIXUserDisk', '[M] UNIX user disk'), ('VsFTPdTraffic', '[M] VsFTPd traffic'), ('Apache2Backend', '[S] Apache 2'), ('BSCWBackend', '[S] BSCW SaaS'), ('Bind9MasterDomainBackend', '[S] Bind9 master domain'), ('Bind9SlaveDomainBackend', '[S] Bind9 slave domain'), ('DokuWikiMuBackend', '[S] DokuWiki multisite'), ('DovecotPostfixPasswdVirtualUserBackend', '[S] Dovecot-Postfix virtualuser'), ('DrupalMuBackend', '[S] Drupal multisite'), ('GitLabSaaSBackend', '[S] GitLab SaaS'), ('AutoresponseBackend', '[S] Mail autoresponse'), ('MailmanBackend', '[S] Mailman'), ('MailmanVirtualDomainBackend', '[S] Mailman virtdomain-only'), ('MySQLBackend', '[S] MySQL database'), ('MySQLUserBackend', '[S] MySQL user'), ('PHPBackend', '[S] PHP FPM/FCGID'), ('PostfixAddressBackend', '[S] Postfix address'), ('PostfixAddressVirtualDomainBackend', '[S] Postfix address virtdomain-only'), ('uWSGIPythonBackend', '[S] Python uWSGI'), ('StaticBackend', '[S] Static'), ('SymbolicLinkBackend', '[S] Symbolic link webapp'), ('SyncBind9MasterDomainBackend', '[S] Sync Bind9 master domain'), ('SyncBind9SlaveDomainBackend', '[S] Sync Bind9 slave domain'), ('UNIXUserMaildirBackend', '[S] UNIX maildir user'), ('UNIXUserBackend', '[S] UNIX user'), ('WebalizerAppBackend', '[S] Webalizer App'), ('WebalizerBackend', '[S] Webalizer Content'), ('WordPressBackend', '[S] Wordpress'), ('WordpressMuBackend', '[S] Wordpress multisite'), ('PhpListSaaSBackend', '[S] phpList SaaS')], verbose_name='backend', max_length=256),
),
]

View file

@ -133,7 +133,9 @@ class Route(models.Model):
match = models.CharField(_("match"), max_length=256, blank=True, default='True',
help_text=_("Python expression used for selecting the targe host, "
"<em>instance</em> referes to the current object."))
# async = models.BooleanField(default=False)
async = models.BooleanField(default=False,
help_text=_("Whether or not block the request/response cycle waitting this backend to "
"finish its execution."))
# method = models.CharField(_("method"), max_lenght=32, choices=method_choices,
# default=MethodBackend.get_default())
is_active = models.BooleanField(_("active"), default=True)
@ -148,6 +150,7 @@ class Route(models.Model):
def backend_class(self):
return ServiceBackend.get_backend(self.backend)
# TODO rename to get_hosts
@classmethod
def get_servers(cls, operation, **kwargs):
cache = kwargs.get('cache', {})
@ -169,6 +172,7 @@ class Route(models.Model):
else:
for route in routes:
if route.matches(operation.instance):
route.host.async = route.async
servers.append(route.host)
return servers

View file

@ -8,7 +8,7 @@ from .backends import ServiceMonitor
@task(name='resources.Monitor')
def monitor(resource_id, ids=None, async=True):
with LockFile('/dev/shm/resources.monitor.lock', expire=60*60):
with LockFile('/dev/shm/resources.monitor-%i.lock' % resource_id, expire=60*60):
from .models import ResourceData, Resource
resource = Resource.objects.get(pk=resource_id)
resource_model = resource.content_type.model_class()

View file

@ -28,9 +28,15 @@ class WebAppServiceMixin(object):
def set_under_construction(self, context):
if context['under_construction_path']:
self.append(textwrap.dedent("""\
if [[ $CREATED == 1 ]]; then
cp -r %(under_construction_path)s %(app_path)s
chown -R %(user)s:%(group)s %(app_path)s
if [[ $CREATED == 1 && ! $(ls -A %(app_path)s) ]]; then
{
# Wait for other backends to do their thing or cp under construction
sleep 1
if [[ ! $(ls -A %(app_path)s) ]]; then
cp -r %(under_construction_path)s %(app_path)s
chown -R %(user)s:%(group)s %(app_path)s
fi
} &
fi""") % context
)

View file

@ -34,13 +34,13 @@ class PHPBackend(WebAppServiceMixin, ServiceController):
def save(self, webapp):
context = self.get_context(webapp)
self.create_webapp_dir(context)
self.set_under_construction(context)
if webapp.type_instance.is_fpm:
self.save_fpm(webapp, context)
self.delete_fcgid(webapp, context)
elif webapp.type_instance.is_fcgid:
self.save_fcgid(webapp, context)
self.delete_fpm(webapp, context)
self.set_under_construction(context)
def save_fpm(self, webapp, context):
self.append(textwrap.dedent("""\
@ -108,36 +108,38 @@ class PHPBackend(WebAppServiceMixin, ServiceController):
def prepare(self):
super(PHPBackend, self).prepare()
# Coordinate apache restart with php backend in order not to overdo it
self.append('echo "PHPBackend" >> /dev/shm/restart.apache2')
self.append(textwrap.dedent("""\
backend="PHPBackend"
echo "$backend" >> /dev/shm/restart.apache2
""")
)
def commit(self):
self.append(textwrap.dedent("""
if [[ $UPDATED_FPM -eq 1 ]]; then
service php5-fpm reload
fi
# Coordinate apache restart with Apache2Backend
restart=0
backend='PHPBackend'
# Coordinate Apache restart with other concurrent backends (i.e. Apache2Backend)
is_last=0
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked || {
sleep 0.1
sleep 0.2
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked
}
state="$(grep -v $backend /dev/shm/restart.apache2.locked)" || restart=1
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then
if [[ $restart == 1 ]]; then
state="$(grep -v "$backend" /dev/shm/restart.apache2.locked)" || is_last=1
if [[ $is_last -eq 1 ]]; then
if [[ $UPDATED_APACHE -eq 1 || "$state" =~ .*RESTART$ ]]; then
service apache2 status && service apache2 reload || service apache2 start
rm /dev/shm/restart.apache2.locked
else
echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi
elif [[ "$state" =~ .*RESTART$ ]]; then
rm /dev/shm/restart.apache2.locked
service apache2 status && service apache2 reload || service apache2 start
else
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then
echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
fi
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi
# End of coordination
""")
)
super(PHPBackend, self).commit()

View file

@ -132,34 +132,36 @@ class Apache2Backend(ServiceController):
def prepare(self):
super(Apache2Backend, self).prepare()
# Coordinate apache restart with php backend in order not to overdo it
self.append('echo "Apache2Backend" >> /dev/shm/restart.apache2')
self.append(textwrap.dedent("""\
backend="Apache2Backend"
echo "$backend" >> /dev/shm/restart.apache2
""")
)
def commit(self):
""" reload Apache2 if necessary """
self.append(textwrap.dedent("""\
# Coordinate apache restart with Apache2Backend
restart=0
backend='Apache2Backend'
self.append(textwrap.dedent("""
# Coordinate Apache restart with other concurrent backends (i.e. Apache2Backend)
is_last=0
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked || {
sleep 0.1
sleep 0.2
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked
}
state="$(grep -v $backend /dev/shm/restart.apache2.locked)" || restart=1
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then
if [[ $restart == 1 ]]; then
state="$(grep -v "$backend" /dev/shm/restart.apache2.locked)" || is_last=1
if [[ $is_last -eq 1 ]]; then
if [[ $UPDATED_APACHE -eq 1 || "$state" =~ .*RESTART$ ]]; then
service apache2 status && service apache2 reload || service apache2 start
rm /dev/shm/restart.apache2.locked
else
echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi
elif [[ "$state" =~ .*RESTART$ ]]; then
rm /dev/shm/restart.apache2.locked
service apache2 status && service apache2 reload || service apache2 start
else
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then
echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
fi
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi""")
fi
# End of coordination
""")
)
super(Apache2Backend, self).commit()

View file

@ -19,8 +19,8 @@ class Command(BaseCommand):
content = run('crontab -l || true').stdout.decode('utf8')
if 'orchestra-beat' not in content:
if context['venv']:
content += "* * * * * . %(venv)s/bin/activate && %(orchestra_beat)s %(site_dir)s/manage.py; deactivate" % context
content += "\n* * * * * . %(venv)s/bin/activate && %(orchestra_beat)s %(site_dir)s/manage.py; deactivate" % context
else:
content += "* * * * * %(orchestra_beat)s %(site_dir)s/manage.py" % context
content += "\n* * * * * %(orchestra_beat)s %(site_dir)s/manage.py" % context
context['content'] = content
run("echo '%(content)s' | crontab" % context, display=True)
run("cat << EOF | crontab\n%(content)s\nEOF" % context, display=True)

View file

@ -7,7 +7,7 @@ def close_connection(execute):
try:
log = execute(*args, **kwargs)
except Exception as e:
pass
raise
else:
wrapper.log = log
finally:

View file

@ -124,7 +124,7 @@ def join(iterator, display=False, silent=False, error_codes=[0]):
if display:
sys.stderr.write("\n\033[1;31mCommandError: %s %s\033[m\n" % (msg, err))
if not silent:
raise CommandError("%s %s %s" % (msg, err, out))
raise CommandError("%s %s" % (msg, err))
out.succeeded = not out.failed
return out
@ -158,6 +158,13 @@ def get_default_celeryd_username():
return user
def touch(fname, mode=0o666, dir_fd=None, **kwargs):
flags = os.O_CREAT | os.O_APPEND
with os.fdopen(os.open(fname, flags=flags, mode=mode, dir_fd=dir_fd)) as f:
os.utime(f.fileno() if os.utime in os.supports_fd else fname,
dir_fd=None if os.supports_fd else dir_fd, **kwargs)
class LockFile(object):
""" File-based lock mechanism used for preventing concurrency problems """
def __init__(self, lockfile, expire=5*60, unlocked=False):