added geopy

This commit is contained in:
2021-08-16 20:00:17 +02:00
commit 633409476b
23 changed files with 9669 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
__pycache__/
__pycache__/*
besser_tanken/__pycache__
merged.feather

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "tankerkoenig-data"]
path = tankerkoenig-data
url = https://tankerkoenig@dev.azure.com/tankerkoenig/tankerkoenig-data/_git/tankerkoenig-data

3
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

8
.idea/besser_tanken.iml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

16
.idea/csv-plugin.xml generated Normal file
View File

@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="/tankerkoenig-data/stations/2021/02/2021-02-07-stations.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

View File

@@ -0,0 +1,62 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="42">
<item index="0" class="java.lang.String" itemvalue="cffi" />
<item index="1" class="java.lang.String" itemvalue="numpy" />
<item index="2" class="java.lang.String" itemvalue="requests" />
<item index="3" class="java.lang.String" itemvalue="markupsafe" />
<item index="4" class="java.lang.String" itemvalue="pyrsistent" />
<item index="5" class="java.lang.String" itemvalue="certifi" />
<item index="6" class="java.lang.String" itemvalue="lxml" />
<item index="7" class="java.lang.String" itemvalue="urllib3" />
<item index="8" class="java.lang.String" itemvalue="itsdangerous" />
<item index="9" class="java.lang.String" itemvalue="jsonschema" />
<item index="10" class="java.lang.String" itemvalue="pynacl" />
<item index="11" class="java.lang.String" itemvalue="flask-restx" />
<item index="12" class="java.lang.String" itemvalue="werkzeug" />
<item index="13" class="java.lang.String" itemvalue="six" />
<item index="14" class="java.lang.String" itemvalue="aniso8601" />
<item index="15" class="java.lang.String" itemvalue="flask-jwt-extended" />
<item index="16" class="java.lang.String" itemvalue="cryptography" />
<item index="17" class="java.lang.String" itemvalue="click" />
<item index="18" class="java.lang.String" itemvalue="attrs" />
<item index="19" class="java.lang.String" itemvalue="bcrypt" />
<item index="20" class="java.lang.String" itemvalue="jinja2" />
<item index="21" class="java.lang.String" itemvalue="pandas" />
<item index="22" class="java.lang.String" itemvalue="paramiko" />
<item index="23" class="java.lang.String" itemvalue="user-agents" />
<item index="24" class="java.lang.String" itemvalue="flask" />
<item index="25" class="java.lang.String" itemvalue="pyjwt" />
<item index="26" class="java.lang.String" itemvalue="idna" />
<item index="27" class="java.lang.String" itemvalue="blinker" />
<item index="28" class="java.lang.String" itemvalue="flask-cors" />
<item index="29" class="java.lang.String" itemvalue="flask-httpauth" />
<item index="30" class="java.lang.String" itemvalue="flask-sitemap" />
<item index="31" class="java.lang.String" itemvalue="mariadb" />
<item index="32" class="java.lang.String" itemvalue="tqdm" />
<item index="33" class="java.lang.String" itemvalue="protobuf" />
<item index="34" class="java.lang.String" itemvalue="tabulate" />
<item index="35" class="java.lang.String" itemvalue="pytimeparse" />
<item index="36" class="java.lang.String" itemvalue="sqlite3-to-mysql" />
<item index="37" class="java.lang.String" itemvalue="packaging" />
<item index="38" class="java.lang.String" itemvalue="mysql-connector-python" />
<item index="39" class="java.lang.String" itemvalue="sqlalchemy" />
<item index="40" class="java.lang.String" itemvalue="simplejson" />
<item index="41" class="java.lang.String" itemvalue="unittest" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E402" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (besser_tanken)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/besser_tanken.iml" filepath="$PROJECT_DIR$/.idea/besser_tanken.iml" />
</modules>
</component>
</project>

7
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/tankerkoenig-data" vcs="Git" />
</component>
</project>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

10
besser_tanken/config.py Normal file
View File

@@ -0,0 +1,10 @@
import os
conf_dir = os.path.dirname(os.path.abspath(__file__))
stations_dir = os.path.abspath(
os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "stations"))
prices_dir = os.path.abspath(
os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "prices"))
feather_dir = os.path.abspath(
os.path.join(conf_dir, os.pardir, "feather_data"))

View File

@@ -0,0 +1,130 @@
import concurrent
import csv
import os
import re
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import date, timedelta
from glob import glob
from threading import Thread, Lock
from loguru import logger
from besser_tanken.config import stations_dir, prices_dir
from besser_tanken.data_tools import fix_date_time_strings
files_re = re.compile(r"(\d\d\d\d)-(\d\d)-(\d\d)-(\S*).csv")
prices_cache = dict()
stations_cache = dict()
def load_date_hierarchical_file(base_dir, year=None, month=None,
day=None) -> str:
year_dirs = glob(f"{base_dir}/*/")
years = []
for y_d in year_dirs:
y = y_d[:-1].rsplit('/', maxsplit=1)[1]
years.append(int(y))
if year is None:
year = sorted(years)[-1]
if int(year) not in years:
raise ValueError(f"No file for year {year}!")
year_dir = f"{base_dir}/{year}"
month_dirs = glob(f"{year_dir}/*/")
months = []
for m_d in month_dirs:
m = m_d[:-1].rsplit('/', maxsplit=1)[1]
months.append(int(m))
if month is None:
month = sorted(months)[-1]
if int(month) not in months:
raise ValueError(f"No file found for month {month}!")
month_dir = f"{year_dir}/{month:02d}"
day_files = glob(f"{month_dir}/*.csv")
days = []
file_type = None
for d in day_files:
match = files_re.search(d)
if match:
days.append(int(match.group(3)))
file_type = match.group(4)
if day is None:
day = sorted(days)[-1]
if int(day) not in days or file_type is None:
raise ValueError(f"No file found for day {day}!")
return os.path.join(month_dir,
f"{year}-{month:02d}-{day:02d}-{file_type}.csv")
def load_prices(year=None, month=None, day=None) -> list:
file = load_date_hierarchical_file(prices_dir, year, month, day)
if file in prices_cache:
logger.info(f"returning prices from cache for file: {file}")
return prices_cache[file]
logger.info(f"loading prices file: {file}")
with open(file, newline='') as csv_file:
res = list(csv.DictReader(csv_file))
logger.info(f"got {len(res)} prices")
res = fix_date_time_strings(res)
prices_cache[file] = res
return res
def load_prices_from(year=None, month=None, day=None, threading=True) -> list:
today = date.today()
start_date = date(year, month, day)
res = []
def _threaded_loading(y: int, m: int, d: int):
try:
return load_prices(y, m, d)
except ValueError:
return None # ignore ValueError, just do nothing
if threading:
with ThreadPoolExecutor(max_workers=16) as executor:
futures = []
while start_date < today:
# Thread(target=_threaded_loading, args=(start_date.year,
# start_date.month,
# start_date.day,)).start()
futures.append(
executor.submit(_threaded_loading, y=start_date.year,
m=start_date.month, d=start_date.day))
start_date = start_date + timedelta(days=1)
for future in as_completed(futures):
try:
r = future.result()
if r is not None:
res.extend(r)
except TimeoutError:
print("timeout occured")
else:
while start_date < today:
try:
res.extend(load_prices(start_date.year,
start_date.month,
start_date.day))
start_date = start_date + timedelta(days=1)
except ValueError:
return res
return res
def load_stations(year=None, month=None, day=None) -> list:
file = load_date_hierarchical_file(stations_dir, year, month, day)
if file in stations_cache:
logger.info(f"returning stations from cache for file: {file}")
return stations_cache[file]
with open(file, newline='') as csv_file:
res = list(csv.DictReader(csv_file))
logger.info(f"got {len(res)} stations")
stations_cache[file] = res
return res
# stations_dir

View File

@@ -0,0 +1,43 @@
from collections import defaultdict
import feather
import pandas
from besser_tanken.config import stations_dir
def group_prices(prices):
grouped_prices = defaultdict(list)
for p in prices:
grouped_prices[p['station_uuid']].append(p)
return grouped_prices
def fix_date_time_strings(price_dict_list):
for p in price_dict_list:
try:
date_time_string, tz_string = p['date'].rsplit('+', maxsplit=1)
if ':' in tz_string:
tz_string.replace(':', '')
else:
tz_string_len = len(tz_string)
if tz_string_len < 4:
tz_string += '0' * (4 - tz_string_len)
p['date'] = date_time_string + "+" + tz_string
except IndexError:
pass
return price_dict_list
def convert_csv_to_feather(csv_file: str, feather_file: str = None):
path = 'my_data.feather'
df = pandas.read_csv(csv_file)
if feather_file is None:
feather_file = csv_file.rsplit('.', maxsplit=1)[0] + '.feather'
feather.write_dataframe(df, feather_file)
# df = feather.read_dataframe(path)
if __name__ == '__main__':
convert_csv_to_feather(f"{stations_dir}/2021/01/2021-01-01-stations.csv")

45
besser_tanken/main.py Normal file
View File

@@ -0,0 +1,45 @@
import csv
import os
import re
from collections import defaultdict
from datetime import date, timedelta, datetime
from glob import glob
from pprint import pprint
from typing import Union
from loguru import logger
from besser_tanken.data_loader import load_stations, load_prices_from, \
load_prices
from besser_tanken.data_tools import group_prices
def _test_get_buehl_stations():
for s in load_stations():
if s['post_code'] in ['77815']:
yield s
if __name__ == '__main__':
buehl_station = next(_test_get_buehl_stations())
print(buehl_station)
for p in load_prices():
# for p in load_prices_from(2021, 4, 1):
print(p['date'])
print(datetime.strptime(p['date'],
'%Y-%m-%d %H:%M:%S%z'))
# exit()
print(p)
print(p['date'])
# if ":" == p['date'][-3]:
# p['date'] = p['date'][:-3] + p['date'][-2:]
# print(p['date'])
datetime_object = datetime.strptime(p['date'] + "00",
'%Y-%m-%d %H:%M:%S%z')
print(datetime_object)
exit()
prices = load_prices_from(2021, 4, 1)
# print(prices)
print(len(prices))
prices = group_prices(prices)
pprint(prices[buehl_station['uuid']])

6
besser_tanken/tests.py Normal file
View File

@@ -0,0 +1,6 @@
import pygeodb
print(pygeodb.distance("42897", "50933")) # strings
print(pygeodb.distance("77815", "78247")) # strings
# https://pypi.org/project/pyGeoDb/

642
feather_test.ipynb Normal file

File diff suppressed because one or more lines are too long

2214
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

21
pyproject.toml Normal file
View File

@@ -0,0 +1,21 @@
[tool.poetry]
name = "besser_tanken"
version = "0.1.0"
description = ""
authors = ["Tobias Kurze <it@t-kurze.de>"]
[tool.poetry.dependencies]
python = "^3.9"
loguru = "^0.5.3"
seaborn = "^0.11.1"
earthpy = "^0.9.2"
jupyter = "^1.0.0"
jupyter_nbextensions_configurator = "^0.4.1"
feather-format = "^0.4.1"
pygeodb = {git = "https://github.com/tkurze/pyGeoDb.git"}
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

2244
sprit_preis_analyse.ipynb Normal file

File diff suppressed because one or more lines are too long

1
tankerkoenig-data Submodule

Submodule tankerkoenig-data added at afed4bbe40