changed a lot regarding rec state, etc.
This commit is contained in:
@@ -1,76 +1,90 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from threading import Lock
|
||||
from typing import Union
|
||||
import random
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
from logging.handlers import TimedRotatingFileHandler
|
||||
from pprint import pprint
|
||||
|
||||
from backend import app, LrcException
|
||||
from backend.models import Recorder
|
||||
from backend.tools.simple_state_checker import check_capture_agent_state, ping_capture_agent
|
||||
from backend import app, main_logger
|
||||
|
||||
cron_log_handler = logging.FileHandler(app.config.get('CRON_LOG_FILE'))
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
from backend.cron.cron_state_checker import recorder_checker
|
||||
|
||||
cron_log_handler = TimedRotatingFileHandler(app.config.get('CRON_LOG_FILE'), interval=1, when='d', backupCount=3)
|
||||
cron_log_handler.setFormatter(logging.Formatter('[%(asctime)s] - %(funcName)20s() %(message)s'))
|
||||
cron_logger = logging.getLogger("mal.cron")
|
||||
cron_logger.addHandler(cron_log_handler)
|
||||
logging.getLogger("apscheduler.scheduler").addHandler(cron_log_handler)
|
||||
logging.getLogger("apscheduler.executors.default").addHandler(cron_log_handler)
|
||||
|
||||
recorder_jobs_lock = Lock()
|
||||
recorder_jobs = set()
|
||||
|
||||
NUM_THREADS = 8
|
||||
scheduler = None
|
||||
|
||||
|
||||
def add_recorder_to_state_check(recorder: Union[int, Recorder]):
|
||||
if isinstance(recorder, int):
|
||||
recorder = Recorder.get_by_identifier(recorder)
|
||||
if recorder is None:
|
||||
cron_logger.warning(
|
||||
"Could not add recorder to state check, as specified id could not be found / recorder is None")
|
||||
raise LrcException("Recorder is None / could not be found!")
|
||||
recorder_jobs_lock.acquire()
|
||||
recorder_jobs.add(recorder)
|
||||
recorder_jobs_lock.release()
|
||||
def get_default_scheduler():
|
||||
cron_logger.debug("creating scheduler!")
|
||||
global scheduler
|
||||
scheduler = BackgroundScheduler()
|
||||
scheduler.daemonic = False
|
||||
|
||||
return scheduler
|
||||
|
||||
|
||||
def remove_recorder_from_state_check(recorder: Union[int, Recorder]):
|
||||
if isinstance(recorder, int):
|
||||
recorder = Recorder.get_by_identifier(recorder)
|
||||
if recorder is None:
|
||||
cron_logger.warning(
|
||||
"Could not remove recorder from state check, as specified id could not be found / recorder is None")
|
||||
raise LrcException("Recorder is None / could not be found (and therefor not removed)!")
|
||||
recorder_jobs_lock.acquire()
|
||||
recorder_jobs.remove(recorder)
|
||||
recorder_jobs_lock.release()
|
||||
def add_default_jobs(sched=None, testing=False):
|
||||
global scheduler
|
||||
if sched is None:
|
||||
sched = scheduler
|
||||
|
||||
if testing:
|
||||
check_recorder_state_job = sched.add_job(recorder_checker.check_object_state, 'interval', seconds=40,
|
||||
id="check_recorder_state_job")
|
||||
|
||||
else:
|
||||
check_recorder_state_job = sched.add_job(recorder_checker.check_object_state, 'interval', minutes=2,
|
||||
id="check_recorder_state_job")
|
||||
|
||||
return [check_recorder_state_job]
|
||||
|
||||
|
||||
def check_recorder_state():
|
||||
recorder_jobs_lock.acquire()
|
||||
recorders = list(recorder_jobs)
|
||||
recorder_jobs_lock.release()
|
||||
def signal_handler(sig, frame):
|
||||
print('You pressed Ctrl+C -> shutting down scheduler!')
|
||||
if scheduler is not None:
|
||||
scheduler.shutdown()
|
||||
sys.exit(0)
|
||||
|
||||
recorder_states = {r['name']: {'state_ok': False, 'msg': 'unknown state!'} for r in recorders}
|
||||
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = [pool.apply_async(check_capture_agent_state, (recorder,)) for recorder in recorders]
|
||||
try:
|
||||
state_results = [res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
cron_logger.error("Timeout while getting capture agent state! {}".format(e))
|
||||
if __name__ == '__main__':
|
||||
# check_for_ingestibles_and_ingest()
|
||||
# remove_obsolete_media_files_objects()
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setFormatter(
|
||||
logging.Formatter('[%(asctime)s] {%(threadName)s} %(levelname)s in %(module)s, line %(lineno)d: %(message)s'))
|
||||
cron_logger.addHandler(stream_handler)
|
||||
cron_logger.setLevel(logging.INFO)
|
||||
|
||||
for r in state_results:
|
||||
if r[0]: # ok :)
|
||||
recorder_states[r[2]] = {'state_ok': True}
|
||||
else:
|
||||
recorder_states[r[2]]['msg'] = r[1]
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
get_default_scheduler()
|
||||
add_default_jobs(testing=True)
|
||||
cron_logger.info("Starting internal scheduler")
|
||||
scheduler.start()
|
||||
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = [pool.apply_async(ping_capture_agent, (recorder,)) for recorder in recorders]
|
||||
try:
|
||||
ping_results = [res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
cron_logger.error("Timeout while pinging capture agent! {}".format(e))
|
||||
c = 0
|
||||
while c < 10:
|
||||
sleep_time = random.randint(10, 20)
|
||||
cron_logger.info("Sleeping for {}s".format(sleep_time))
|
||||
time.sleep(sleep_time)
|
||||
recorder_id = random.randint(0, 15)
|
||||
cron_logger.info("Using recorder id {}".format(recorder_id))
|
||||
recorder_checker.add_object_to_state_check(recorder_id)
|
||||
recorder_checker.add_object_to_state_check(recorder_id+1)
|
||||
pprint(recorder_checker.get_current_state())
|
||||
|
||||
for r in ping_results:
|
||||
if not r[0]: # ok :)
|
||||
recorder_states[r[2]]['msg'] = r[1]
|
||||
while True:
|
||||
user_in = input("Type >exit< to quit.")
|
||||
if user_in == "exit" or user_in == ">exit<":
|
||||
break
|
||||
|
||||
scheduler.shutdown()
|
||||
|
||||
136
backend/cron/cron_state_checker.py
Normal file
136
backend/cron/cron_state_checker.py
Normal file
@@ -0,0 +1,136 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import datetime
|
||||
import logging
|
||||
from multiprocessing.context import TimeoutError
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from threading import Lock
|
||||
from typing import Union, Callable, TypeVar, Generic, Set, List
|
||||
|
||||
from backend.models import Recorder
|
||||
from backend.tools.simple_state_checker import check_capture_agent_state, ping_capture_agent
|
||||
|
||||
logger = logging.getLogger("mal.cron.recorder_state")
|
||||
|
||||
recorder_jobs_lock = Lock()
|
||||
recorder_jobs = set()
|
||||
|
||||
NUM_THREADS = 8
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class StateChecker(Generic[T]):
|
||||
def __init__(self, state_checker_func: Union[Callable, List[Callable]], type_to_check: T, type_name=None,
|
||||
threads=NUM_THREADS):
|
||||
self.num_threads = threads
|
||||
self.lock = Lock()
|
||||
self.jobs: Set[T] = set()
|
||||
self.checker_func = state_checker_func
|
||||
self.checker_type = type_to_check
|
||||
self.update_state_lock = Lock()
|
||||
self.state_results = {}
|
||||
self.type_name = type_name if type_name is not None else self.checker_type.__name__
|
||||
|
||||
def add_object_to_state_check(self, object_to_check: Union[int, T]):
|
||||
if isinstance(object_to_check, int):
|
||||
if not hasattr(self.checker_type, 'get_by_identifier'):
|
||||
logger.error(
|
||||
'Can\'t add object to state check, as >get_by_identifier< not defined on checker_type ({})!'.format(
|
||||
str(self.checker_type)))
|
||||
return
|
||||
object_to_check = self.checker_type.get_by_identifier(object_to_check)
|
||||
if object_to_check is None:
|
||||
logger.warning(
|
||||
"Could not add object ({}) to state check, as specified >id ({})< could not be found / object is None".format(
|
||||
self.type_name, object_to_check))
|
||||
return
|
||||
self.lock.acquire()
|
||||
if hasattr(object_to_check, 'name'):
|
||||
name = object_to_check.name
|
||||
else:
|
||||
name = str(object_to_check)
|
||||
logger.debug("Adding {} to object ({}) to state check".format(self.type_name, name))
|
||||
self.jobs.add(object_to_check)
|
||||
self.lock.release()
|
||||
|
||||
def remove_recorder_from_state_check(self, object_to_check: Union[int, T]):
|
||||
if isinstance(object_to_check, int):
|
||||
object_to_check = self.checker_type.get_by_identifier(object_to_check)
|
||||
if object_to_check is None:
|
||||
logger.warning(
|
||||
"Could not remove object ({}) from state check, as specified id could not be found / object is None".format(
|
||||
self.type_name))
|
||||
return
|
||||
self.lock.acquire()
|
||||
if hasattr(object_to_check, 'name'):
|
||||
name = object_to_check.name
|
||||
else:
|
||||
name = str(object_to_check)
|
||||
logger.debug("Removing {} from object ({}) to state check".format(self.type_name, name))
|
||||
self.jobs.remove(object_to_check)
|
||||
self.lock.release()
|
||||
|
||||
def execute_checker_func(self, func, jobs: List[T], object_states: dict) -> dict:
|
||||
with ThreadPool(self.num_threads) as pool:
|
||||
results = [pool.apply_async(func, (job,)) for job in jobs]
|
||||
try:
|
||||
state_results = [res.get(timeout=12) for res in results]
|
||||
for r in state_results:
|
||||
if r[0]: # ok :)
|
||||
if object_states[r[2]].get('msg', "") == "unknown state!":
|
||||
del object_states[r[2]]['msg']
|
||||
object_states[r[2]] = {
|
||||
'msg': ", ".join([s for s in [object_states[r[2]].get('msg', None), r[1]] if s]),
|
||||
'state_ok': True}
|
||||
else:
|
||||
object_states[r[2]]['msg'] = r[1]
|
||||
except TimeoutError as e:
|
||||
logger.error("Timeout while performing state check func! {}".format(e))
|
||||
|
||||
return object_states
|
||||
|
||||
def check_object_state(self) -> dict:
|
||||
logger.info("checking object ({}) state...".format(self.type_name))
|
||||
self.lock.acquire()
|
||||
jobs = list(self.jobs)
|
||||
self.lock.release()
|
||||
|
||||
if len(jobs) <= 0:
|
||||
logger.info("No objects ({}) to check... returning".format(self.type_name))
|
||||
return {}
|
||||
logger.info("checking state of {} recorders".format(len(jobs)))
|
||||
|
||||
object_states = {j.name: {'state_ok': False, 'msg': 'unknown state!'} for j in jobs}
|
||||
|
||||
if isinstance(self.checker_func, list):
|
||||
for c_f in self.checker_func:
|
||||
self.execute_checker_func(c_f, jobs, object_states)
|
||||
else:
|
||||
self.execute_checker_func(self.checker_func, jobs, object_states)
|
||||
|
||||
self.update_state_dict(object_states)
|
||||
|
||||
return object_states
|
||||
|
||||
def update_state_dict(self, object_states: dict):
|
||||
self.update_state_lock.acquire()
|
||||
for o_s in object_states.keys():
|
||||
if o_s in self.state_results:
|
||||
# update existing state
|
||||
self.state_results[o_s] = {**object_states[o_s],
|
||||
'time_stamp': datetime.datetime.now(datetime.timezone.utc).strftime(
|
||||
"%d.%m.%Y - %H:%M:%S %Z"),
|
||||
'previous': {'state_ok': self.state_results[o_s]['state_ok'],
|
||||
'msg': self.state_results[o_s].get('msg', None),
|
||||
'time_stamp': self.state_results[o_s].get('time_stamp', None)}}
|
||||
pass
|
||||
else:
|
||||
self.state_results[o_s] = object_states[o_s]
|
||||
|
||||
self.update_state_lock.release()
|
||||
|
||||
def get_current_state(self):
|
||||
return self.check_object_state()
|
||||
|
||||
|
||||
recorder_checker = StateChecker([check_capture_agent_state, ping_capture_agent], Recorder)
|
||||
11
backend/models/model_base.py
Normal file
11
backend/models/model_base.py
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
class ModelBase:
|
||||
def get(self, attribute_name, default_value=None):
|
||||
if hasattr(self, attribute_name):
|
||||
return getattr(self, attribute_name)
|
||||
elif False: # a check for properties?
|
||||
pass
|
||||
elif default_value is not None:
|
||||
return default_value
|
||||
else:
|
||||
raise KeyError("{} not found".format(attribute_name))
|
||||
@@ -18,12 +18,13 @@ from backend import db, app, login_manager, LrcException
|
||||
from sqlalchemy import or_
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from backend.models.model_base import ModelBase
|
||||
from backend.models.virtual_command_model import virtual_command_recorder_command_table, virtual_command_recorder_table
|
||||
|
||||
metadata = MetaData()
|
||||
|
||||
|
||||
class RecorderModel(db.Model):
|
||||
class RecorderModel(db.Model, ModelBase):
|
||||
__table_args__ = {'extend_existing': True}
|
||||
id = db.Column(db.Integer, autoincrement=True, primary_key=True)
|
||||
created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow())
|
||||
@@ -43,6 +44,10 @@ class RecorderModel(db.Model):
|
||||
def get_all():
|
||||
return RecorderModel.query.all()
|
||||
|
||||
@staticmethod
|
||||
def get_by_id(id):
|
||||
return RecorderModel.query.filter(RecorderModel.id == id).first()
|
||||
|
||||
@staticmethod
|
||||
def get_by_name(name):
|
||||
return RecorderModel.query.filter(RecorderModel.model_name == name).first()
|
||||
@@ -79,7 +84,7 @@ class RecorderModel(db.Model):
|
||||
return self.model_name + " (record adapter: {})".format(self.record_adapter_id)
|
||||
|
||||
|
||||
class Recorder(db.Model):
|
||||
class Recorder(db.Model, ModelBase):
|
||||
__table_args__ = {'extend_existing': True}
|
||||
id = db.Column(db.Integer, autoincrement=True, primary_key=True)
|
||||
created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow())
|
||||
@@ -209,7 +214,7 @@ class Recorder(db.Model):
|
||||
sort_keys=True, indent=4)
|
||||
|
||||
|
||||
class RecorderCommand(db.Model):
|
||||
class RecorderCommand(db.Model, ModelBase):
|
||||
__table_args__ = {'extend_existing': True}
|
||||
"""Table containing permissions associated with groups."""
|
||||
id = db.Column(db.Integer, autoincrement=True, primary_key=True)
|
||||
|
||||
@@ -47,21 +47,19 @@ class SMP35x(TelnetAdapter, RecorderAdapter):
|
||||
self.tn.write(password + "\n\r")
|
||||
|
||||
out = self.tn.assert_string_in_output("Login Administrator")
|
||||
print(out)
|
||||
# print(out)
|
||||
if not out[0]:
|
||||
print(out[1])
|
||||
# print(out[1])
|
||||
if "Password:" in out[1]:
|
||||
# TODO: loop until logged in...
|
||||
logger.warning("Could not login (as admin) with given password! {}".format(self.address))
|
||||
print("re-enter pw")
|
||||
logger.debug("re-enter password")
|
||||
self.tn.write(self.password + "\n\r")
|
||||
print(self.tn.assert_string_in_output("Login Administrator"))
|
||||
print("WRONG (admin) password!! Exiting!")
|
||||
print(self.password)
|
||||
# print(self.tn.assert_string_in_output("Login Administrator"))
|
||||
self.tn = None
|
||||
logger.error("Could definitely not login (as admin) with given password! {}".format(self.address))
|
||||
raise Exception("Could not login as administrator with given pw!")
|
||||
print("OK, we have admin rights!")
|
||||
# print("OK, we have admin rights!")
|
||||
|
||||
def _get_name(self):
|
||||
return RECORDER_MODEL_NAME
|
||||
|
||||
@@ -5,6 +5,8 @@ import subprocess
|
||||
import threading
|
||||
from io import StringIO
|
||||
from logging.handlers import MemoryHandler
|
||||
from pprint import pprint
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
@@ -15,6 +17,7 @@ from ics import Calendar
|
||||
|
||||
from backend import LrcException
|
||||
from backend.config import Config
|
||||
from backend.models import Recorder, RecorderModel
|
||||
from backend.recorder_adapters import RecorderAdapter
|
||||
|
||||
from backend.recorder_adapters.epiphan_base import Epiphan
|
||||
@@ -76,7 +79,7 @@ def get_capture_agents():
|
||||
return res.json()["agents"]["agent"]
|
||||
|
||||
|
||||
def get_recorder_details():
|
||||
def get_recorder_details_old():
|
||||
"""Temporary implementation using initial_recorders.json. Should be replaced by DB layer later!"""
|
||||
global recorders
|
||||
if recorders is None:
|
||||
@@ -86,10 +89,20 @@ def get_recorder_details():
|
||||
return recorders
|
||||
|
||||
|
||||
def get_recorder_details():
|
||||
"""New implementation using DB"""
|
||||
global recorders
|
||||
if recorders is None:
|
||||
recorders = list(Recorder.get_all())
|
||||
return recorders
|
||||
|
||||
|
||||
def get_recorder_by_name(name: str):
|
||||
for r in get_recorder_details():
|
||||
if r["name"] == name:
|
||||
logger.debug("Got recorder {}".format(r.get("name")))
|
||||
if r.get("name") == name or r.get("name") + " Recorder" == name or r.get("name") == name + " Recorder":
|
||||
return r
|
||||
logger.error("Could not find recorder for name {}".format(name))
|
||||
return None
|
||||
|
||||
|
||||
@@ -97,115 +110,131 @@ def notify_users_of_problem(msg: str):
|
||||
pass
|
||||
|
||||
|
||||
def get_recorder_adapter(recorder_info: dict) -> RecorderAdapter:
|
||||
if "SMP" in recorder_info["type"]:
|
||||
rec = SMP35x(recorder_info['ip'], recorder_info['password'])
|
||||
def get_recorder_adapter(recorder_info: Union[dict, Recorder]) -> RecorderAdapter:
|
||||
if recorder_info is None:
|
||||
raise LrcException("Could not find recorder Adapter as recorder info was NONE!")
|
||||
try:
|
||||
type = recorder_info.get("type")
|
||||
except KeyError:
|
||||
type = RecorderModel.get_by_id(recorder_info.get('recorder_model_id')).model_name
|
||||
if "SMP" in type:
|
||||
rec = SMP35x(recorder_info.get('ip'), recorder_info.get('password'))
|
||||
else:
|
||||
rec = Epiphan(recorder_info['ip'], recorder_info["username"], recorder_info["password"])
|
||||
rec = Epiphan(recorder_info.get('ip'), recorder_info.get("username"), recorder_info.get("password"))
|
||||
return rec
|
||||
|
||||
|
||||
def check_capture_agent_state(a: dict):
|
||||
def check_capture_agent_state(recorder_agent: Union[Recorder, dict]):
|
||||
if recorder_agent.get('offline', False):
|
||||
logger.info("OK - Recorder {} is in offline / maintenance mode".format(recorder_agent.get('name')))
|
||||
return True, "Recorder is in offline / maintenance mode", recorder_agent.get('name')
|
||||
agent_state_error_msg = None
|
||||
logger.debug("Checking Agent {}".format(a['name']))
|
||||
c = get_calender(a['name'])
|
||||
logger.debug("Checking Agent {}".format(recorder_agent.get('name')))
|
||||
c = get_calender(recorder_agent.get('name'))
|
||||
is_recording_in_calendar = len(list(c.timeline.now())) >= 1
|
||||
if is_recording_in_calendar:
|
||||
logger.info("{} has entry in Calender and should therefore be recording... checking now!".format(a['name']))
|
||||
if a['state'] == "capturing":
|
||||
recorder_info = get_recorder_by_name(a['name'])
|
||||
logger.info("{} has entry in Calender and should therefore be recording... checking now!".format(recorder_agent.get('name')))
|
||||
if recorder_agent['state'] == "capturing":
|
||||
recorder_info = get_recorder_by_name(recorder_agent.get('name'))
|
||||
try:
|
||||
rec = get_recorder_adapter(recorder_info)
|
||||
if rec.is_recording():
|
||||
logger.info("OK – recorder {} is recording :)".format(a['name']))
|
||||
logger.info("OK – recorder {} is recording :)".format(recorder_agent.get('name')))
|
||||
with agent_states_lock:
|
||||
agent_states[a['name']] = 'OK - recorder is recording'
|
||||
agent_states[recorder_agent.get('name')] = 'OK - recorder is recording'
|
||||
else:
|
||||
logger.info(rec.get_recording_status())
|
||||
logger.error("FATAL - recorder {} must be recording but is not!!!!".format(a['name']))
|
||||
logger.error("FATAL - recorder {} must be recording but is not!!!!".format(recorder_agent.get('name')))
|
||||
agent_state_error_msg = "FATAL - recorder must be recording but is not!"
|
||||
with agent_states_lock:
|
||||
agent_states[a['name']] = 'FATAL - recorder is NOT recording, but should!'
|
||||
agent_states[recorder_agent['name']] = 'FATAL - recorder is NOT recording, but should!'
|
||||
except LrcException as e:
|
||||
logger.fatal("Exception occurred: {}".format(str(e)))
|
||||
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
|
||||
logger.error("Could not check state of recorder {}, Address: {}".format(recorder_agent.get('name'), recorder_info.get('ip')))
|
||||
else:
|
||||
logger.error("FATAL: {} is not in capturing state...but should be!!".format(a['name']))
|
||||
logger.error("FATAL: {} is not in capturing state...but should be!!".format(recorder_agent.get('name')))
|
||||
agent_state_error_msg = "FATAL - is not in capturing state...but should be!"
|
||||
else:
|
||||
recorder_info = get_recorder_by_name(a['name'])
|
||||
recorder_info = get_recorder_by_name(recorder_agent.get('name'))
|
||||
try:
|
||||
rec = get_recorder_adapter(recorder_info)
|
||||
if rec.is_recording():
|
||||
logger.error("FATAL - recorder must not be recording!!!!")
|
||||
agent_state_error_msg = "FATAL - is not in capturing state...but should be!"
|
||||
with agent_states_lock:
|
||||
agent_states[a['name']] = 'FATAL - recorder IS recording, but should NOT!'
|
||||
agent_states[recorder_agent.get('name')] = 'FATAL - recorder IS recording, but should NOT!'
|
||||
else:
|
||||
logger.info("OK – recorder is not recording :)")
|
||||
with agent_states_lock:
|
||||
agent_states[a['name']] = 'OK - recorder is NOT recording'
|
||||
agent_states[recorder_agent.get('name')] = 'OK - recorder is NOT recording'
|
||||
except LrcException as e:
|
||||
logger.fatal("Exception occurred: {}".format(str(e)))
|
||||
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
|
||||
agent_state_error_msg = "FATAL - Could not check state of recorder! Address: {}".format(recorder_info['ip'])
|
||||
logger.error("Could not check state of recorder {}, Address: {}".format(recorder_agent.get('name'), recorder_info.get('ip')))
|
||||
agent_state_error_msg = "FATAL - Could not check state of recorder! Address: {}".format(recorder_info.get('ip'))
|
||||
|
||||
if agent_state_error_msg is None:
|
||||
return True, "", a['name']
|
||||
return False, agent_state_error_msg, a['name']
|
||||
return True, agent_states[recorder_agent.get('name')], recorder_agent.get('name')
|
||||
return False, agent_state_error_msg, recorder_agent.get('name')
|
||||
|
||||
|
||||
def ping_capture_agent(a: dict):
|
||||
recorder_ip = get_recorder_by_name(a['name'])['ip']
|
||||
def ping_capture_agent(recorder_agent: Union[Recorder, dict]):
|
||||
if recorder_agent.get('offline', False):
|
||||
print("is offline!")
|
||||
logger.info("OK - Ping skipped, recorder {} is in offline mode.".format(recorder_agent.get('name')))
|
||||
return True, "Recorder is in offline / maintenance mode", recorder_agent.get('name')
|
||||
recorder_ip = get_recorder_by_name(recorder_agent.get('name')).get('ip')
|
||||
try:
|
||||
response = subprocess.check_call(
|
||||
subprocess.check_call(
|
||||
['ping', '-W', '10', '-c', '2', recorder_ip],
|
||||
# stderr=subprocess.STDOUT, # get all output
|
||||
stdout=subprocess.DEVNULL, # suppress output
|
||||
stderr=subprocess.DEVNULL,
|
||||
universal_newlines=True # return string not bytes
|
||||
)
|
||||
logger.info("Successfully pinged {} ({}). :-)".format(a['name'], recorder_ip))
|
||||
return True, "", a['name']
|
||||
logger.info("Successfully pinged {} ({}). :-)".format(recorder_agent.get('name'), recorder_ip))
|
||||
return True, "Successfully pinged {}. :-)".format(recorder_agent.get('name')), recorder_agent.get('name')
|
||||
except subprocess.CalledProcessError:
|
||||
logger.error("Can not ping {} ({})!!".format(a['name'], recorder_ip))
|
||||
return False, "Unable to ping", a['name']
|
||||
logger.error("Can not ping {} ({})!!".format(recorder_agent.get('name'), recorder_ip))
|
||||
return False, "Unable to ping {} ({})".format(recorder_agent.get('name'), recorder_ip), recorder_agent.get('name')
|
||||
|
||||
|
||||
agents = get_capture_agents()
|
||||
logger.info("Got {} capture agents that will be checked...".format(len(agents)))
|
||||
if __name__ == '__main__':
|
||||
agents = get_capture_agents()
|
||||
logger.info("Got {} capture agents that will be checked...".format(len(agents)))
|
||||
|
||||
for a in agents:
|
||||
agent_states[a['name']] = 'PROBLEMATIC - unknown'
|
||||
for a in agents:
|
||||
agent_states[a.get('name')] = 'PROBLEMATIC - unknown'
|
||||
|
||||
|
||||
# pool = ThreadPool(5)
|
||||
# pool.map(check_capture_agent_state, agents)
|
||||
# pool = ThreadPool(5)
|
||||
# pool.map(check_capture_agent_state, agents)
|
||||
|
||||
NUM_THREADS = 8
|
||||
NUM_THREADS = 8
|
||||
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in agents]
|
||||
try:
|
||||
[res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
logger.error("Timeout while pinging capture agent! {}".format(e))
|
||||
recorders = get_recorder_details()
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
# results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in agents]
|
||||
results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in recorders]
|
||||
try:
|
||||
[res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
logger.error("Timeout while pinging capture agent! {}".format(e))
|
||||
|
||||
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = [pool.apply_async(check_capture_agent_state, (agent,)) for agent in agents]
|
||||
try:
|
||||
[res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
logger.error("Timeout while getting capture agent state! {}".format(e))
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = [pool.apply_async(check_capture_agent_state, (agent,)) for agent in agents]
|
||||
try:
|
||||
[res.get(timeout=12) for res in results]
|
||||
except TimeoutError as e:
|
||||
logger.error("Timeout while getting capture agent state! {}".format(e))
|
||||
|
||||
logger.info("DONE checking capture agents / recorders!")
|
||||
logger.info("DONE checking capture agents / recorders!")
|
||||
|
||||
logged_events = rec_err_state_log_stream.getvalue()
|
||||
if len(logged_events) > 0:
|
||||
logged_events += "\n\n=============\nAgent States:\n\n{}".format(''.join(
|
||||
"{:<48}: {}\n".format(a, agent_states[a]) for a in agent_states
|
||||
))
|
||||
send_error_mail(logged_events, "Errors have been detected while checking recorder states!")
|
||||
logged_events = rec_err_state_log_stream.getvalue()
|
||||
if len(logged_events) > 0:
|
||||
logged_events += "\n\n=============\nAgent States:\n\n{}".format(''.join(
|
||||
"{:<48}: {}\n".format(a, agent_states[a]) for a in agent_states
|
||||
))
|
||||
send_error_mail(logged_events, "Errors have been detected while checking recorder states!")
|
||||
|
||||
#mem_handler.close()
|
||||
#mem_handler.close()
|
||||
|
||||
Reference in New Issue
Block a user