now scraping rooms from capmus mgmt

2019-08-13 15:29:37 +02:00
parent 48505b76ea
commit f70cbdc463
17 changed files with 364 additions and 61 deletions
--- a/tools/model_updater.py
+++ b/tools/model_updater.py
@@ -21,8 +21,9 @@ def calculate_md5_checksum(string_to_md5_sum: str):


 def create_recorder_commands_for_recorder_adapter(command_definitions: dict, recorder_model: RecorderModel):
-    existing_recorder_commands = RecorderCommand.query.filter(and_(RecorderCommand.name.in_(command_definitions.keys())),
-                                                              RecorderCommand.recorder_model == recorder_model)
+    existing_recorder_commands = RecorderCommand.query.filter(
+        and_(RecorderCommand.name.in_(command_definitions.keys())),
+        RecorderCommand.recorder_model == recorder_model)
    existing_commands = set()
    for existing_command in existing_recorder_commands:
        existing_commands.add(existing_command.name)
@@ -51,7 +52,9 @@ def update_recorder_models_database():
        r_m = RecorderModel.get_by_adapter_id(r_a["id"])
        model_checksum = calculate_md5_checksum(dumps(r_a["commands"]))
        if r_m is None:
-            r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum)
+            r_m = RecorderModel(record_adapter_id=r_a["id"], model_name=r_a["name"], checksum=model_checksum,
+                                requires_user=r_a.get('requires_user', None),
+                                requires_password=r_a.get('requires_password', None))
            db.session.add(r_m)
            db.session.flush()
            db.session.refresh(r_m)
--- a/tools/scrape_rooms.py
+++ b/tools/scrape_rooms.py
@@ -0,0 +1,60 @@
+from pprint import pprint
+
+import re
+import requests
+from bs4 import BeautifulSoup
+
+
+def scrape_rooms():
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
+
+    room_url = "https://campus.kit.edu/live-stud/campus/all/roomgroup.asp?roomgroupcolumn1=H%F6r%2D%2FLehrsaal&tguid=0x1A35C3A1490748388EBEBA3943EFCDD5"
+    page = requests.get(room_url, headers=headers)
+    # soup = BeautifulSoup(page.content, 'html5lib')
+    soup = BeautifulSoup(page.content, 'html.parser')
+
+    # pprint(page.content)
+
+    # pprint(soup.prettify())
+
+    idx = 0
+
+    rooms = []
+
+    re_string = r"^(\d\d.\d\d)?\s(.*)"
+    re_exp = re.compile(re_string)
+
+    for tr in soup.find_all('tr'):
+        idx += 1
+        if idx == 1:  # skip first row
+            continue
+        a_name = tr.find_all('a')[0].string
+        a_building = tr.find_all('a')[3].string
+        match = re_exp.match(a_name)
+        if match is not None:
+            building_number, name = re_exp.match(a_name).groups()
+        else:
+            name = a_name
+            building_number = None
+
+        match = re_exp.match(a_building)
+        if match is not None:
+            building_number, building_name = re_exp.match(a_building).groups()
+        else:
+            building_name = a_name
+            building_number = None
+
+        room = {'name': name,
+                'room_number': tr.find_all('a')[1].string if tr.find_all('a')[0].string != "None" else tr.find_all('a')[
+                    1].string,
+                'building_name': building_name,
+                'building_number': building_number}
+
+        rooms.append(room)
+
+    return rooms
+
+
+if __name__ == '__main__':
+    scrape_rooms()