changed a lot regarding rec state, etc.

This commit is contained in:
Tobias Kurze
2019-12-03 16:05:02 +01:00
parent a709dbcaef
commit 190f728eb7
13 changed files with 369 additions and 831 deletions

View File

@@ -5,6 +5,8 @@ import subprocess
import threading
from io import StringIO
from logging.handlers import MemoryHandler
from pprint import pprint
from typing import Union
import requests
from requests.auth import HTTPBasicAuth
@@ -15,6 +17,7 @@ from ics import Calendar
from backend import LrcException
from backend.config import Config
from backend.models import Recorder, RecorderModel
from backend.recorder_adapters import RecorderAdapter
from backend.recorder_adapters.epiphan_base import Epiphan
@@ -76,7 +79,7 @@ def get_capture_agents():
return res.json()["agents"]["agent"]
def get_recorder_details():
def get_recorder_details_old():
"""Temporary implementation using initial_recorders.json. Should be replaced by DB layer later!"""
global recorders
if recorders is None:
@@ -86,10 +89,20 @@ def get_recorder_details():
return recorders
def get_recorder_details():
"""New implementation using DB"""
global recorders
if recorders is None:
recorders = list(Recorder.get_all())
return recorders
def get_recorder_by_name(name: str):
for r in get_recorder_details():
if r["name"] == name:
logger.debug("Got recorder {}".format(r.get("name")))
if r.get("name") == name or r.get("name") + " Recorder" == name or r.get("name") == name + " Recorder":
return r
logger.error("Could not find recorder for name {}".format(name))
return None
@@ -97,115 +110,131 @@ def notify_users_of_problem(msg: str):
pass
def get_recorder_adapter(recorder_info: dict) -> RecorderAdapter:
if "SMP" in recorder_info["type"]:
rec = SMP35x(recorder_info['ip'], recorder_info['password'])
def get_recorder_adapter(recorder_info: Union[dict, Recorder]) -> RecorderAdapter:
if recorder_info is None:
raise LrcException("Could not find recorder Adapter as recorder info was NONE!")
try:
type = recorder_info.get("type")
except KeyError:
type = RecorderModel.get_by_id(recorder_info.get('recorder_model_id')).model_name
if "SMP" in type:
rec = SMP35x(recorder_info.get('ip'), recorder_info.get('password'))
else:
rec = Epiphan(recorder_info['ip'], recorder_info["username"], recorder_info["password"])
rec = Epiphan(recorder_info.get('ip'), recorder_info.get("username"), recorder_info.get("password"))
return rec
def check_capture_agent_state(a: dict):
def check_capture_agent_state(recorder_agent: Union[Recorder, dict]):
if recorder_agent.get('offline', False):
logger.info("OK - Recorder {} is in offline / maintenance mode".format(recorder_agent.get('name')))
return True, "Recorder is in offline / maintenance mode", recorder_agent.get('name')
agent_state_error_msg = None
logger.debug("Checking Agent {}".format(a['name']))
c = get_calender(a['name'])
logger.debug("Checking Agent {}".format(recorder_agent.get('name')))
c = get_calender(recorder_agent.get('name'))
is_recording_in_calendar = len(list(c.timeline.now())) >= 1
if is_recording_in_calendar:
logger.info("{} has entry in Calender and should therefore be recording... checking now!".format(a['name']))
if a['state'] == "capturing":
recorder_info = get_recorder_by_name(a['name'])
logger.info("{} has entry in Calender and should therefore be recording... checking now!".format(recorder_agent.get('name')))
if recorder_agent['state'] == "capturing":
recorder_info = get_recorder_by_name(recorder_agent.get('name'))
try:
rec = get_recorder_adapter(recorder_info)
if rec.is_recording():
logger.info("OK recorder {} is recording :)".format(a['name']))
logger.info("OK recorder {} is recording :)".format(recorder_agent.get('name')))
with agent_states_lock:
agent_states[a['name']] = 'OK - recorder is recording'
agent_states[recorder_agent.get('name')] = 'OK - recorder is recording'
else:
logger.info(rec.get_recording_status())
logger.error("FATAL - recorder {} must be recording but is not!!!!".format(a['name']))
logger.error("FATAL - recorder {} must be recording but is not!!!!".format(recorder_agent.get('name')))
agent_state_error_msg = "FATAL - recorder must be recording but is not!"
with agent_states_lock:
agent_states[a['name']] = 'FATAL - recorder is NOT recording, but should!'
agent_states[recorder_agent['name']] = 'FATAL - recorder is NOT recording, but should!'
except LrcException as e:
logger.fatal("Exception occurred: {}".format(str(e)))
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
logger.error("Could not check state of recorder {}, Address: {}".format(recorder_agent.get('name'), recorder_info.get('ip')))
else:
logger.error("FATAL: {} is not in capturing state...but should be!!".format(a['name']))
logger.error("FATAL: {} is not in capturing state...but should be!!".format(recorder_agent.get('name')))
agent_state_error_msg = "FATAL - is not in capturing state...but should be!"
else:
recorder_info = get_recorder_by_name(a['name'])
recorder_info = get_recorder_by_name(recorder_agent.get('name'))
try:
rec = get_recorder_adapter(recorder_info)
if rec.is_recording():
logger.error("FATAL - recorder must not be recording!!!!")
agent_state_error_msg = "FATAL - is not in capturing state...but should be!"
with agent_states_lock:
agent_states[a['name']] = 'FATAL - recorder IS recording, but should NOT!'
agent_states[recorder_agent.get('name')] = 'FATAL - recorder IS recording, but should NOT!'
else:
logger.info("OK recorder is not recording :)")
with agent_states_lock:
agent_states[a['name']] = 'OK - recorder is NOT recording'
agent_states[recorder_agent.get('name')] = 'OK - recorder is NOT recording'
except LrcException as e:
logger.fatal("Exception occurred: {}".format(str(e)))
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
agent_state_error_msg = "FATAL - Could not check state of recorder! Address: {}".format(recorder_info['ip'])
logger.error("Could not check state of recorder {}, Address: {}".format(recorder_agent.get('name'), recorder_info.get('ip')))
agent_state_error_msg = "FATAL - Could not check state of recorder! Address: {}".format(recorder_info.get('ip'))
if agent_state_error_msg is None:
return True, "", a['name']
return False, agent_state_error_msg, a['name']
return True, agent_states[recorder_agent.get('name')], recorder_agent.get('name')
return False, agent_state_error_msg, recorder_agent.get('name')
def ping_capture_agent(a: dict):
recorder_ip = get_recorder_by_name(a['name'])['ip']
def ping_capture_agent(recorder_agent: Union[Recorder, dict]):
if recorder_agent.get('offline', False):
print("is offline!")
logger.info("OK - Ping skipped, recorder {} is in offline mode.".format(recorder_agent.get('name')))
return True, "Recorder is in offline / maintenance mode", recorder_agent.get('name')
recorder_ip = get_recorder_by_name(recorder_agent.get('name')).get('ip')
try:
response = subprocess.check_call(
subprocess.check_call(
['ping', '-W', '10', '-c', '2', recorder_ip],
# stderr=subprocess.STDOUT, # get all output
stdout=subprocess.DEVNULL, # suppress output
stderr=subprocess.DEVNULL,
universal_newlines=True # return string not bytes
)
logger.info("Successfully pinged {} ({}). :-)".format(a['name'], recorder_ip))
return True, "", a['name']
logger.info("Successfully pinged {} ({}). :-)".format(recorder_agent.get('name'), recorder_ip))
return True, "Successfully pinged {}. :-)".format(recorder_agent.get('name')), recorder_agent.get('name')
except subprocess.CalledProcessError:
logger.error("Can not ping {} ({})!!".format(a['name'], recorder_ip))
return False, "Unable to ping", a['name']
logger.error("Can not ping {} ({})!!".format(recorder_agent.get('name'), recorder_ip))
return False, "Unable to ping {} ({})".format(recorder_agent.get('name'), recorder_ip), recorder_agent.get('name')
agents = get_capture_agents()
logger.info("Got {} capture agents that will be checked...".format(len(agents)))
if __name__ == '__main__':
agents = get_capture_agents()
logger.info("Got {} capture agents that will be checked...".format(len(agents)))
for a in agents:
agent_states[a['name']] = 'PROBLEMATIC - unknown'
for a in agents:
agent_states[a.get('name')] = 'PROBLEMATIC - unknown'
# pool = ThreadPool(5)
# pool.map(check_capture_agent_state, agents)
# pool = ThreadPool(5)
# pool.map(check_capture_agent_state, agents)
NUM_THREADS = 8
NUM_THREADS = 8
with ThreadPool(NUM_THREADS) as pool:
results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in agents]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while pinging capture agent! {}".format(e))
recorders = get_recorder_details()
with ThreadPool(NUM_THREADS) as pool:
# results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in agents]
results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in recorders]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while pinging capture agent! {}".format(e))
with ThreadPool(NUM_THREADS) as pool:
results = [pool.apply_async(check_capture_agent_state, (agent,)) for agent in agents]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while getting capture agent state! {}".format(e))
with ThreadPool(NUM_THREADS) as pool:
results = [pool.apply_async(check_capture_agent_state, (agent,)) for agent in agents]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while getting capture agent state! {}".format(e))
logger.info("DONE checking capture agents / recorders!")
logger.info("DONE checking capture agents / recorders!")
logged_events = rec_err_state_log_stream.getvalue()
if len(logged_events) > 0:
logged_events += "\n\n=============\nAgent States:\n\n{}".format(''.join(
"{:<48}: {}\n".format(a, agent_states[a]) for a in agent_states
))
send_error_mail(logged_events, "Errors have been detected while checking recorder states!")
logged_events = rec_err_state_log_stream.getvalue()
if len(logged_events) > 0:
logged_events += "\n\n=============\nAgent States:\n\n{}".format(''.join(
"{:<48}: {}\n".format(a, agent_states[a]) for a in agent_states
))
send_error_mail(logged_events, "Errors have been detected while checking recorder states!")
#mem_handler.close()
#mem_handler.close()