Non-blocking beat

This commit is contained in:
Marc Aymerich 2015-05-06 10:51:12 +00:00
parent d9a7eefb07
commit 31ac035c27
7 changed files with 155 additions and 105 deletions

View File

@ -386,3 +386,6 @@ try: import uwsgi to know its running uwsgi
# don't block on beat, and --report periodic tasks # don't block on beat, and --report periodic tasks
# Deprecate restart/start/stop services (do touch wsgi.py and fuck celery) # Deprecate restart/start/stop services (do touch wsgi.py and fuck celery)
# high perf apache sync,

View File

@ -16,7 +16,7 @@ from datetime import datetime, timedelta
from celery.schedules import crontab_parser as CrontabParser from celery.schedules import crontab_parser as CrontabParser
from orchestra.utils.sys import run, join from orchestra.utils.sys import run, join, LockFile
class Setting(object): class Setting(object):
@ -126,27 +126,21 @@ def fire_pending_messages(settings, db):
if __name__ == "__main__": if __name__ == "__main__":
# TODO aquire lock with LockFile('/dev/shm/beat.lock', expire=20):
manage = sys.argv[1] manage = sys.argv[1]
procs = [] procs = []
settings = Setting(manage).get_settings() settings = Setting(manage).get_settings()
db = DB(settings) db = DB(settings)
db.connect() db.connect()
try: try:
if 'orchestra.contrib.tasks' in settings['INSTALLED_APPS']: # Non-blocking loop, we need to finish this in time for the next minute.
if settings.get('TASKS_BACKEND', 'thread') in ('thread', 'process'): if 'orchestra.contrib.tasks' in settings['INSTALLED_APPS']:
for proc in fire_pending_tasks(manage, db): if settings.get('TASKS_BACKEND', 'thread') in ('thread', 'process'):
for proc in fire_pending_tasks(manage, db):
procs.append(proc)
if 'orchestra.contrib.mailer' in settings['INSTALLED_APPS']:
for proc in fire_pending_messages(settings, db):
procs.append(proc) procs.append(proc)
if 'orchestra.contrib.mailer' in settings['INSTALLED_APPS']: finally:
for proc in fire_pending_messages(settings, db): db.close()
procs.append(proc) sys.exit(0)
exit_code = 0
for proc in procs:
result = join(proc)
sys.stdout.write(result.stdout.decode('utf8'))
sys.stderr.write(result.stderr.decode('utf8'))
if result.return_code != 0:
exit_code = result.return_code
finally:
db.close()
sys.exit(exit_code)

View File

@ -4,6 +4,8 @@ from socket import error as SocketError
from django.core.mail import get_connection from django.core.mail import get_connection
from django.utils.encoding import smart_str from django.utils.encoding import smart_str
from orchestra.utils.sys import LockFile
from .models import Message from .models import Message
@ -29,23 +31,23 @@ def send_message(message, num=0, connection=None, bulk=100):
def send_pending(bulk=100): def send_pending(bulk=100):
# TODO aquire lock with LockFile('/dev/shm/mailer.send_pending.lock'):
connection = None connection = None
num = 0 num = 0
for message in Message.objects.filter(state=Message.QUEUED).order_by('priority'): for message in Message.objects.filter(state=Message.QUEUED).order_by('priority'):
send_message(message, num, connection, bulk) send_message(message, num, connection, bulk)
num += 1 num += 1
from django.utils import timezone from django.utils import timezone
from . import settings from . import settings
from datetime import timedelta from datetime import timedelta
from django.db.models import Q from django.db.models import Q
now = timezone.now() now = timezone.now()
qs = Q() qs = Q()
for retries, seconds in enumerate(settings.MAILER_DEFERE_SECONDS): for retries, seconds in enumerate(settings.MAILER_DEFERE_SECONDS):
delta = timedelta(seconds=seconds) delta = timedelta(seconds=seconds)
qs = qs | Q(retries=retries, last_retry__lte=now-delta) qs = qs | Q(retries=retries, last_retry__lte=now-delta)
for message in Message.objects.filter(state=Message.DEFERRED).filter(qs).order_by('priority'): for message in Message.objects.filter(state=Message.DEFERRED).filter(qs).order_by('priority'):
send_message(message, num, connection, bulk) send_message(message, num, connection, bulk)
if connection is not None: if connection is not None:
connection.close() connection.close()

View File

@ -1,52 +1,53 @@
from orchestra.contrib.orchestration import Operation from orchestra.contrib.orchestration import Operation
from orchestra.contrib.tasks import task from orchestra.contrib.tasks import task
from orchestra.models.utils import get_model_field_path from orchestra.models.utils import get_model_field_path
from orchestra.utils.sys import LockFile
from .backends import ServiceMonitor from .backends import ServiceMonitor
@task(name='resources.Monitor') @task(name='resources.Monitor')
def monitor(resource_id, ids=None, async=True): def monitor(resource_id, ids=None, async=True):
from .models import ResourceData, Resource with LockFile('/dev/shm/resources.monitor.lock', expire=60*60):
from .models import ResourceData, Resource
resource = Resource.objects.get(pk=resource_id) resource = Resource.objects.get(pk=resource_id)
resource_model = resource.content_type.model_class() resource_model = resource.content_type.model_class()
logs = [] logs = []
# Execute monitors # Execute monitors
for monitor_name in resource.monitors: for monitor_name in resource.monitors:
backend = ServiceMonitor.get_backend(monitor_name) backend = ServiceMonitor.get_backend(monitor_name)
model = backend.model_class() model = backend.model_class()
kwargs = {} kwargs = {}
if ids: if ids:
path = get_model_field_path(model, resource_model) path = get_model_field_path(model, resource_model)
path = '%s__in' % ('__'.join(path) or 'id') path = '%s__in' % ('__'.join(path) or 'id')
kwargs = { kwargs = {
path: ids path: ids
} }
# Execute monitor # Execute monitor
monitorings = [] monitorings = []
for obj in model.objects.filter(**kwargs):
op = Operation(backend, obj, Operation.MONITOR)
monitorings.append(op)
# TODO async=True only when running with celery
# monitor.request.id
logs += Operation.execute(monitorings, async=async)
kwargs = {'id__in': ids} if ids else {}
# Update used resources and trigger resource exceeded and revovery
triggers = []
model = resource.content_type.model_class()
for obj in model.objects.filter(**kwargs): for obj in model.objects.filter(**kwargs):
op = Operation(backend, obj, Operation.MONITOR) data, __ = ResourceData.get_or_create(obj, resource)
monitorings.append(op) data.update()
# TODO async=True only when running with celery if not resource.disable_trigger:
# monitor.request.id a = data.used
logs += Operation.execute(monitorings, async=async) b = data.allocated
if data.used > (data.allocated or 0):
kwargs = {'id__in': ids} if ids else {} op = Operation(backend, obj, Operation.EXCEEDED)
# Update used resources and trigger resource exceeded and revovery triggers.append(op)
triggers = [] elif data.used < (data.allocated or 0):
model = resource.content_type.model_class() op = Operation(backend, obj, Operation.RECOVERY)
for obj in model.objects.filter(**kwargs): triggers.append(op)
data, __ = ResourceData.get_or_create(obj, resource) Operation.execute(triggers)
data.update() return logs
if not resource.disable_trigger:
a = data.used
b = data.allocated
if data.used > (data.allocated or 0):
op = Operation(backend, obj, Operation.EXCEEDED)
triggers.append(op)
elif data.used < (data.allocated or 0):
op = Operation(backend, obj, Operation.RECOVERY)
triggers.append(op)
Operation.execute(triggers)
return logs

View File

@ -1,5 +1,6 @@
import os import os
import textwrap import textwrap
from collections import OrderedDict
from django.template import Template, Context from django.template import Template, Context
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
@ -115,19 +116,27 @@ class PHPBackend(WebAppServiceMixin, ServiceController):
service php5-fpm reload service php5-fpm reload
fi fi
# Coordinate apache restart with Apache2Backend # Coordinate apache restart with Apache2Backend
# FIXME race condition restart=0
locked=1 backend='PHPBackend'
state="$(grep -v 'PHPBackend' /dev/shm/restart.apache2)" || locked=0 mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked || {
echo -n "$state" > /dev/shm/restart.apache2 sleep 0.1
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked
}
state="$(grep -v $backend /dev/shm/restart.apache2.locked)" || restart=1
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then if [[ $UPDATED_APACHE -eq 1 ]]; then
if [[ $locked == 0 ]]; then if [[ $restart == 1 ]]; then
service apache2 status && service apache2 reload || service apache2 start service apache2 status && service apache2 reload || service apache2 start
rm /dev/shm/restart.apache2.locked
else else
echo "PHPBackend RESTART" >> /dev/shm/restart.apache2 echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi fi
elif [[ "$state" =~ .*RESTART$ ]]; then elif [[ "$state" =~ .*RESTART$ ]]; then
rm /dev/shm/restart.apache2 rm /dev/shm/restart.apache2.locked
service apache2 status && service apache2 reload || service apache2 start service apache2 status && service apache2 reload || service apache2 start
else
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi fi
""") """)
) )
@ -135,7 +144,6 @@ class PHPBackend(WebAppServiceMixin, ServiceController):
def get_options(self, webapp): def get_options(self, webapp):
kwargs = {} kwargs = {}
print(webapp.data)
if self.MERGE: if self.MERGE:
kwargs = { kwargs = {
'webapp__account': webapp.account, 'webapp__account': webapp.account,
@ -195,10 +203,10 @@ class PHPBackend(WebAppServiceMixin, ServiceController):
def get_fcgid_cmd_options(self, webapp, context): def get_fcgid_cmd_options(self, webapp, context):
options = self.get_options(webapp) options = self.get_options(webapp)
maps = { maps = OrderedDict(
'MaxProcesses': options.get('processes', None), MaxProcesses=options.get('processes', None),
'IOTimeout': options.get('timeout', None), IOTimeout=options.get('timeout', None),
} )
cmd_options = [] cmd_options = []
for directive, value in maps.items(): for directive, value in maps.items():
if value: if value:

View File

@ -137,18 +137,28 @@ class Apache2Backend(ServiceController):
def commit(self): def commit(self):
""" reload Apache2 if necessary """ """ reload Apache2 if necessary """
self.append(textwrap.dedent("""\ self.append(textwrap.dedent("""\
locked=1 # Coordinate apache restart with Apache2Backend
state="$(grep -v 'Apache2Backend' /dev/shm/restart.apache2)" || locked=0 restart=0
echo -n "$state" > /dev/shm/restart.apache2 backend='Apache2Backend'
if [[ $UPDATED == 1 ]]; then mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked || {
if [[ $locked == 0 ]]; then sleep 0.1
mv /dev/shm/restart.apache2 /dev/shm/restart.apache2.locked
}
state="$(grep -v $backend /dev/shm/restart.apache2.locked)" || restart=1
echo -n "$state" > /dev/shm/restart.apache2.locked
if [[ $UPDATED_APACHE -eq 1 ]]; then
if [[ $restart == 1 ]]; then
service apache2 status && service apache2 reload || service apache2 start service apache2 status && service apache2 reload || service apache2 start
rm /dev/shm/restart.apache2.locked
else else
echo "Apache2Backend RESTART" >> /dev/shm/restart.apache2 echo "$backend RESTART" >> /dev/shm/restart.apache2.locked
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi fi
elif [[ "$state" =~ .*RESTART$ ]]; then elif [[ "$state" =~ .*RESTART$ ]]; then
rm /dev/shm/restart.apache2 rm /dev/shm/restart.apache2.locked
service apache2 status && service apache2 reload || service apache2 start service apache2 status && service apache2 reload || service apache2 start
else
mv /dev/shm/restart.apache2.locked /dev/shm/restart.apache2
fi""") fi""")
) )
super(Apache2Backend, self).commit() super(Apache2Backend, self).commit()

View File

@ -156,3 +156,35 @@ def get_default_celeryd_username():
if user is None: if user is None:
raise CommandError("Can not find the default celeryd username") raise CommandError("Can not find the default celeryd username")
return user return user
class LockFile(object):
""" File-based lock mechanism used for preventing concurrency problems """
def __init__(self, lockfile, expire=5*60, unlocked=False):
""" /dev/shm/ can be a good place for storing locks ;) """
self.lockfile = lockfile
self.expire = expire
self.unlocked = unlocked
def acquire(self):
if os.path.exists(self.lockfile):
lock_time = os.path.getmtime(self.lockfile)
# lock expires to avoid starvation
if time.time()-lock_time < self.expire:
return False
touch(self.lockfile)
return True
def release(self):
os.remove(self.lockfile)
def __enter__(self):
if not self.unlocked:
if not self.acquire():
raise OperationLocked('%s lock file exists and its mtime is less '
'than %s seconds' % (self.lockfile, self.expire))
return True
def __exit__(self, type, value, traceback):
if not self.unlocked:
self.release()