Files
prop-data-guard/backend/.venv/lib/python3.9/site-packages/flower/events.py
T
2026-04-22 17:07:33 +08:00

211 lines
7.3 KiB
Python

import collections
import logging
import shelve
import threading
import time
from collections import Counter
from functools import partial
from celery.events import EventReceiver
from celery.events.state import State
from prometheus_client import Counter as PrometheusCounter
from prometheus_client import Gauge, Histogram
from tornado.ioloop import PeriodicCallback
from tornado.options import options
logger = logging.getLogger(__name__)
PROMETHEUS_METRICS = None
def get_prometheus_metrics():
global PROMETHEUS_METRICS # pylint: disable=global-statement
if PROMETHEUS_METRICS is None:
PROMETHEUS_METRICS = PrometheusMetrics()
return PROMETHEUS_METRICS
class PrometheusMetrics:
def __init__(self):
self.events = PrometheusCounter('flower_events_total', "Number of events", ['worker', 'type', 'task'])
self.runtime = Histogram(
'flower_task_runtime_seconds',
"Task runtime",
['worker', 'task'],
buckets=options.task_runtime_metric_buckets
)
self.prefetch_time = Gauge(
'flower_task_prefetch_time_seconds',
"The time the task spent waiting at the celery worker to be executed.",
['worker', 'task']
)
self.number_of_prefetched_tasks = Gauge(
'flower_worker_prefetched_tasks',
'Number of tasks of given type prefetched at a worker',
['worker', 'task']
)
self.worker_online = Gauge('flower_worker_online', "Worker online status", ['worker'])
self.worker_number_of_currently_executing_tasks = Gauge(
'flower_worker_number_of_currently_executing_tasks',
"Number of tasks currently executing at a worker",
['worker']
)
class EventsState(State):
# EventsState object is created and accessed only from ioloop thread
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.counter = collections.defaultdict(Counter)
self.metrics = get_prometheus_metrics()
def event(self, event):
# Save the event
super().event(event)
worker_name = event['hostname']
event_type = event['type']
self.counter[worker_name][event_type] += 1
if event_type.startswith('task-'):
task_id = event['uuid']
task = self.tasks.get(task_id)
task_name = event.get('name', '')
if not task_name and task_id in self.tasks:
task_name = task.name or ''
self.metrics.events.labels(worker_name, event_type, task_name).inc()
runtime = event.get('runtime', 0)
if runtime:
self.metrics.runtime.labels(worker_name, task_name).observe(runtime)
task_started = task.started
task_received = task.received
if event_type == 'task-received' and not task.eta and task_received:
self.metrics.number_of_prefetched_tasks.labels(worker_name, task_name).inc()
if event_type == 'task-started' and not task.eta and task_started and task_received:
self.metrics.prefetch_time.labels(worker_name, task_name).set(task_started - task_received)
self.metrics.number_of_prefetched_tasks.labels(worker_name, task_name).dec()
if event_type in ['task-succeeded', 'task-failed'] and not task.eta and task_started and task_received:
self.metrics.prefetch_time.labels(worker_name, task_name).set(0)
if event_type == 'worker-online':
self.metrics.worker_online.labels(worker_name).set(1)
if event_type == 'worker-heartbeat':
self.metrics.worker_online.labels(worker_name).set(1)
num_executing_tasks = event.get('active')
if num_executing_tasks is not None:
self.metrics.worker_number_of_currently_executing_tasks.labels(worker_name).set(num_executing_tasks)
if event_type == 'worker-offline':
self.metrics.worker_online.labels(worker_name).set(0)
class Events(threading.Thread):
events_enable_interval = 5000
# pylint: disable=too-many-arguments
def __init__(self, capp, io_loop, db=None, persistent=False,
enable_events=True, state_save_interval=0,
**kwargs):
threading.Thread.__init__(self)
self.daemon = True
self.io_loop = io_loop
self.capp = capp
self.db = db
self.persistent = persistent
self.enable_events = enable_events
self.state = None
self.state_save_timer = None
if self.persistent:
logger.debug("Loading state from '%s'...", self.db)
state = shelve.open(self.db)
if state:
self.state = state['events']
state.close()
if state_save_interval:
self.state_save_timer = PeriodicCallback(self.save_state,
state_save_interval)
if not self.state:
self.state = EventsState(**kwargs)
self.timer = PeriodicCallback(self.on_enable_events,
self.events_enable_interval)
def start(self):
threading.Thread.start(self)
if self.enable_events:
logger.debug("Starting enable events timer...")
self.timer.start()
if self.state_save_timer:
logger.debug("Starting state save timer...")
self.state_save_timer.start()
def stop(self):
if self.enable_events:
logger.debug("Stopping enable events timer...")
self.timer.stop()
if self.state_save_timer:
logger.debug("Stopping state save timer...")
self.state_save_timer.stop()
if self.persistent:
self.save_state()
def run(self):
try_interval = 1
while True:
try:
try_interval *= 2
with self.capp.connection() as conn:
recv = EventReceiver(conn,
handlers={"*": self.on_event},
app=self.capp)
try_interval = 1
logger.debug("Capturing events...")
recv.capture(limit=None, timeout=None, wakeup=True)
except (KeyboardInterrupt, SystemExit):
try:
import _thread as thread
except ImportError:
import thread
thread.interrupt_main()
except Exception as e:
logger.error("Failed to capture events: '%s', "
"trying again in %s seconds.",
e, try_interval)
logger.debug(e, exc_info=True)
time.sleep(try_interval)
def save_state(self):
logger.debug("Saving state to '%s'...", self.db)
state = shelve.open(self.db, flag='n')
state['events'] = self.state
state.close()
def on_enable_events(self):
# Periodically enable events for workers
# launched after flower
self.io_loop.run_in_executor(None, self.capp.control.enable_events)
def on_event(self, event):
# Call EventsState.event in ioloop thread to avoid synchronization
self.io_loop.add_callback(partial(self.state.event, event))