moved everything to a new module called backend
This commit is contained in:
60
backend/tools/scrape_rooms.py
Normal file
60
backend/tools/scrape_rooms.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from pprint import pprint
|
||||
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def scrape_rooms():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
|
||||
room_url = "https://campus.kit.edu/live-stud/campus/all/roomgroup.asp?roomgroupcolumn1=H%F6r%2D%2FLehrsaal&tguid=0x1A35C3A1490748388EBEBA3943EFCDD5"
|
||||
page = requests.get(room_url, headers=headers)
|
||||
# soup = BeautifulSoup(page.content, 'html5lib')
|
||||
soup = BeautifulSoup(page.content, 'html.parser')
|
||||
|
||||
# pprint(page.content)
|
||||
|
||||
# pprint(soup.prettify())
|
||||
|
||||
idx = 0
|
||||
|
||||
rooms = []
|
||||
|
||||
re_string = r"^(\d\d.\d\d)?\s(.*)"
|
||||
re_exp = re.compile(re_string)
|
||||
|
||||
for tr in soup.find_all('tr'):
|
||||
idx += 1
|
||||
if idx == 1: # skip first row
|
||||
continue
|
||||
a_name = tr.find_all('a')[0].string
|
||||
a_building = tr.find_all('a')[3].string
|
||||
match = re_exp.match(a_name)
|
||||
if match is not None:
|
||||
building_number, name = re_exp.match(a_name).groups()
|
||||
else:
|
||||
name = a_name
|
||||
building_number = None
|
||||
|
||||
match = re_exp.match(a_building)
|
||||
if match is not None:
|
||||
building_number, building_name = re_exp.match(a_building).groups()
|
||||
else:
|
||||
building_name = a_name
|
||||
building_number = None
|
||||
|
||||
room = {'name': name,
|
||||
'room_number': tr.find_all('a')[1].string if tr.find_all('a')[0].string != "None" else tr.find_all('a')[
|
||||
1].string,
|
||||
'building_name': building_name,
|
||||
'building_number': building_number}
|
||||
|
||||
rooms.append(room)
|
||||
|
||||
return rooms
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
scrape_rooms()
|
||||
Reference in New Issue
Block a user