now scraping rooms from capmus mgmt

This commit is contained in:
Tobias Kurze
2019-08-13 15:29:37 +02:00
parent 48505b76ea
commit f70cbdc463
17 changed files with 364 additions and 61 deletions

View File

@@ -21,8 +21,9 @@ def calculate_md5_checksum(string_to_md5_sum: str):
def create_recorder_commands_for_recorder_adapter(command_definitions: dict, recorder_model: RecorderModel):
existing_recorder_commands = RecorderCommand.query.filter(and_(RecorderCommand.name.in_(command_definitions.keys())),
RecorderCommand.recorder_model == recorder_model)
existing_recorder_commands = RecorderCommand.query.filter(
and_(RecorderCommand.name.in_(command_definitions.keys())),
RecorderCommand.recorder_model == recorder_model)
existing_commands = set()
for existing_command in existing_recorder_commands:
existing_commands.add(existing_command.name)
@@ -51,7 +52,9 @@ def update_recorder_models_database():
r_m = RecorderModel.get_by_adapter_id(r_a["id"])
model_checksum = calculate_md5_checksum(dumps(r_a["commands"]))
if r_m is None:
r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum)
r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum,
requires_user=r_a.get('requires_user', None),
requires_password=r_a.get('requires_password', None))
db.session.add(r_m)
db.session.flush()
db.session.refresh(r_m)

60
tools/scrape_rooms.py Normal file
View File

@@ -0,0 +1,60 @@
from pprint import pprint
import re
import requests
from bs4 import BeautifulSoup
def scrape_rooms():
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
room_url = "https://campus.kit.edu/live-stud/campus/all/roomgroup.asp?roomgroupcolumn1=H%F6r%2D%2FLehrsaal&tguid=0x1A35C3A1490748388EBEBA3943EFCDD5"
page = requests.get(room_url, headers=headers)
# soup = BeautifulSoup(page.content, 'html5lib')
soup = BeautifulSoup(page.content, 'html.parser')
# pprint(page.content)
# pprint(soup.prettify())
idx = 0
rooms = []
re_string = r"^(\d\d.\d\d)?\s(.*)"
re_exp = re.compile(re_string)
for tr in soup.find_all('tr'):
idx += 1
if idx == 1: # skip first row
continue
a_name = tr.find_all('a')[0].string
a_building = tr.find_all('a')[3].string
match = re_exp.match(a_name)
if match is not None:
building_number, name = re_exp.match(a_name).groups()
else:
name = a_name
building_number = None
match = re_exp.match(a_building)
if match is not None:
building_number, building_name = re_exp.match(a_building).groups()
else:
building_name = a_name
building_number = None
room = {'name': name,
'room_number': tr.find_all('a')[1].string if tr.find_all('a')[0].string != "None" else tr.find_all('a')[
1].string,
'building_name': building_name,
'building_number': building_number}
rooms.append(room)
return rooms
if __name__ == '__main__':
scrape_rooms()