Files
lrc-backend/backend/tools/simple_state_checker.py
2019-11-20 12:41:05 +01:00

201 lines
7.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import os
import logging
import subprocess
import threading
from io import StringIO
from logging.handlers import MemoryHandler
import requests
from requests.auth import HTTPBasicAuth
from multiprocessing.pool import ThreadPool
from multiprocessing.context import TimeoutError
from ics import Calendar
from backend import LrcException
from backend.config import Config
from backend.recorder_adapters import RecorderAdapter
from backend.recorder_adapters.epiphan_base import Epiphan
from backend.recorder_adapters.extron_smp import SMP35x
from backend.tools.send_mail import send_error_mail, get_smtp_error_handler
logger = logging.getLogger("lrc.tools.simple_state_checker")
smtp_error_handler = get_smtp_error_handler(subject="Errors have been detected while checking recorder states!")
#mem_handler = MemoryHandler(capacity=100, flushLevel=logging.FATAL, target=smtp_error_handler)
#mem_handler.setLevel(logging.WARNING)
rec_err_state_log_stream = StringIO()
rec_err_state_log_stream_handler = logging.StreamHandler(stream=rec_err_state_log_stream)
rec_err_state_log_stream_handler.setLevel(logging.WARNING)
logger.addHandler(rec_err_state_log_stream_handler)
#logger.addHandler(mem_handler)
base_url = "https://opencast.bibliothek.kit.edu"
session = requests.session()
session.auth = HTTPBasicAuth(Config.OPENCAST_USER, Config.OPENCAST_PW)
config = {'service_urls': {}}
recorders = None
agent_states_lock = threading.RLock()
agent_states = {}
def get_service_url(service_type: str):
if service_type in config['service_urls']:
return config['service_urls'][service_type]
params = {'serviceType': service_type}
url = base_url + "/services/available.json"
res = session.get(url, params=params)
if res.ok:
service = res.json()["services"]["service"]
config["service_urls"][service_type] = service["host"] + \
service["path"]
return service["host"] + service["path"]
return None
def get_calender(rec_id):
params = {'agentid': rec_id}
url = get_service_url('org.opencastproject.scheduler') + "/calendars"
res = session.get(url, params=params)
if res.ok:
return Calendar(res.text)
def get_capture_agents():
url = get_service_url("org.opencastproject.capture.admin") + "/agents.json"
res = session.get(url)
if res.ok:
return res.json()["agents"]["agent"]
def get_recorder_details():
"""Temporary implementation using initial_recorders.json. Should be replaced by DB layer later!"""
global recorders
if recorders is None:
f = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'models', 'initial_recorders.json'))
with open(f, 'r') as json_file:
recorders = json.load(json_file)['recorders']
return recorders
def get_recorder_by_name(name: str):
for r in get_recorder_details():
if r["name"] == name:
return r
return None
def notify_users_of_problem(msg: str):
pass
def get_recorder_adapter(recorder_info: dict) -> RecorderAdapter:
if "SMP" in recorder_info["type"]:
rec = SMP35x(recorder_info['ip'], recorder_info['password'])
else:
rec = Epiphan(recorder_info['ip'], recorder_info["username"], recorder_info["password"])
return rec
def check_capture_agent_state(a: dict):
logger.debug("Checking Agent {}".format(a['name']))
c = get_calender(a['name'])
is_recording_in_calendar = len(list(c.timeline.now())) >= 1
if is_recording_in_calendar:
logger.info("{} has entry in Calender and should therefore be recording... checking now!".format(a['name']))
if a['state'] == "capturing":
recorder_info = get_recorder_by_name(a['name'])
try:
rec = get_recorder_adapter(recorder_info)
if rec.is_recording():
logger.info("OK recorder {} is recording :)".format(a['name']))
with agent_states_lock:
agent_states[a['name']] = 'OK - recorder is recording'
else:
logger.info(rec.get_recording_status())
logger.error("FATAL - recorder {} must be recording but is not!!!!".format(a['name']))
with agent_states_lock:
agent_states[a['name']] = 'FATAL - recorder is NOT recording, but should!'
except LrcException as e:
logger.fatal("Exception occurred: {}".format(str(e)))
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
else:
logger.error("FATAL: {} is not in capturing state...but should be!!".format(a['name']))
else:
recorder_info = get_recorder_by_name(a['name'])
try:
rec = get_recorder_adapter(recorder_info)
if rec.is_recording():
logger.error("FATAL - recorder must not be recording!!!!")
with agent_states_lock:
agent_states[a['name']] = 'FATAL - recorder IS recording, but should NOT!'
else:
logger.info("OK recorder is not recording :)")
with agent_states_lock:
agent_states[a['name']] = 'OK - recorder is NOT recording'
except LrcException as e:
logger.fatal("Exception occurred: {}".format(str(e)))
logger.error("Could not check state of recorder {}, Address: {}".format(a['name'], recorder_info['ip']))
def ping_capture_agent(a: dict):
recorder_ip = get_recorder_by_name(a['name'])['ip']
try:
response = subprocess.check_call(
['ping', '-W', '10', '-c', '2', recorder_ip],
# stderr=subprocess.STDOUT, # get all output
stdout=subprocess.DEVNULL, # suppress output
stderr=subprocess.DEVNULL,
universal_newlines=True # return string not bytes
)
logger.info("Successfully pinged {} ({}). :-)".format(a['name'], recorder_ip))
except subprocess.CalledProcessError:
logger.error("Can not ping {} ({})!!".format(a['name'], recorder_ip))
agents = get_capture_agents()
logger.info("Got {} capture agents that will be checked...".format(len(agents)))
for a in agents:
agent_states[a['name']] = 'PROBLEMATIC - unknown'
# pool = ThreadPool(5)
# pool.map(check_capture_agent_state, agents)
NUM_THREADS = 8
with ThreadPool(NUM_THREADS) as pool:
results = [pool.apply_async(ping_capture_agent, (agent,)) for agent in agents]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while pinging capture agent! {}".format(e))
with ThreadPool(NUM_THREADS) as pool:
results = [pool.apply_async(check_capture_agent_state, (agent,)) for agent in agents]
try:
[res.get(timeout=12) for res in results]
except TimeoutError as e:
logger.error("Timeout while getting capture agent state! {}".format(e))
logger.info("DONE checking capture agents / recorders!")
logged_events = rec_err_state_log_stream.getvalue()
if len(logged_events) > 0:
logged_events += "\n\n=============\nAgent States:\n\n{}".format(''.join(
"{:<48}: {}\n".format(a, agent_states[a]) for a in agent_states
))
send_error_mail(logged_events, "Errors have been detected while checking recorder states!")
#mem_handler.close()