now scraping rooms from capmus mgmt
This commit is contained in:
@@ -21,8 +21,9 @@ def calculate_md5_checksum(string_to_md5_sum: str):
|
||||
|
||||
|
||||
def create_recorder_commands_for_recorder_adapter(command_definitions: dict, recorder_model: RecorderModel):
|
||||
existing_recorder_commands = RecorderCommand.query.filter(and_(RecorderCommand.name.in_(command_definitions.keys())),
|
||||
RecorderCommand.recorder_model == recorder_model)
|
||||
existing_recorder_commands = RecorderCommand.query.filter(
|
||||
and_(RecorderCommand.name.in_(command_definitions.keys())),
|
||||
RecorderCommand.recorder_model == recorder_model)
|
||||
existing_commands = set()
|
||||
for existing_command in existing_recorder_commands:
|
||||
existing_commands.add(existing_command.name)
|
||||
@@ -51,7 +52,9 @@ def update_recorder_models_database():
|
||||
r_m = RecorderModel.get_by_adapter_id(r_a["id"])
|
||||
model_checksum = calculate_md5_checksum(dumps(r_a["commands"]))
|
||||
if r_m is None:
|
||||
r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum)
|
||||
r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum,
|
||||
requires_user=r_a.get('requires_user', None),
|
||||
requires_password=r_a.get('requires_password', None))
|
||||
db.session.add(r_m)
|
||||
db.session.flush()
|
||||
db.session.refresh(r_m)
|
||||
|
||||
60
tools/scrape_rooms.py
Normal file
60
tools/scrape_rooms.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from pprint import pprint
|
||||
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def scrape_rooms():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
|
||||
room_url = "https://campus.kit.edu/live-stud/campus/all/roomgroup.asp?roomgroupcolumn1=H%F6r%2D%2FLehrsaal&tguid=0x1A35C3A1490748388EBEBA3943EFCDD5"
|
||||
page = requests.get(room_url, headers=headers)
|
||||
# soup = BeautifulSoup(page.content, 'html5lib')
|
||||
soup = BeautifulSoup(page.content, 'html.parser')
|
||||
|
||||
# pprint(page.content)
|
||||
|
||||
# pprint(soup.prettify())
|
||||
|
||||
idx = 0
|
||||
|
||||
rooms = []
|
||||
|
||||
re_string = r"^(\d\d.\d\d)?\s(.*)"
|
||||
re_exp = re.compile(re_string)
|
||||
|
||||
for tr in soup.find_all('tr'):
|
||||
idx += 1
|
||||
if idx == 1: # skip first row
|
||||
continue
|
||||
a_name = tr.find_all('a')[0].string
|
||||
a_building = tr.find_all('a')[3].string
|
||||
match = re_exp.match(a_name)
|
||||
if match is not None:
|
||||
building_number, name = re_exp.match(a_name).groups()
|
||||
else:
|
||||
name = a_name
|
||||
building_number = None
|
||||
|
||||
match = re_exp.match(a_building)
|
||||
if match is not None:
|
||||
building_number, building_name = re_exp.match(a_building).groups()
|
||||
else:
|
||||
building_name = a_name
|
||||
building_number = None
|
||||
|
||||
room = {'name': name,
|
||||
'room_number': tr.find_all('a')[1].string if tr.find_all('a')[0].string != "None" else tr.find_all('a')[
|
||||
1].string,
|
||||
'building_name': building_name,
|
||||
'building_number': building_number}
|
||||
|
||||
rooms.append(room)
|
||||
|
||||
return rooms
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
scrape_rooms()
|
||||
Reference in New Issue
Block a user