added geopy
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
__pycache__/
|
||||
__pycache__/*
|
||||
besser_tanken/__pycache__
|
||||
merged.feather
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "tankerkoenig-data"]
|
||||
path = tankerkoenig-data
|
||||
url = https://tankerkoenig@dev.azure.com/tankerkoenig/tankerkoenig-data/_git/tankerkoenig-data
|
||||
3
.idea/.gitignore
generated
vendored
Normal file
3
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
8
.idea/besser_tanken.iml
generated
Normal file
8
.idea/besser_tanken.iml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
16
.idea/csv-plugin.xml
generated
Normal file
16
.idea/csv-plugin.xml
generated
Normal file
@@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CsvFileAttributes">
|
||||
<option name="attributeMap">
|
||||
<map>
|
||||
<entry key="/tankerkoenig-data/stations/2021/02/2021-02-07-stations.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
||||
62
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
62
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="42">
|
||||
<item index="0" class="java.lang.String" itemvalue="cffi" />
|
||||
<item index="1" class="java.lang.String" itemvalue="numpy" />
|
||||
<item index="2" class="java.lang.String" itemvalue="requests" />
|
||||
<item index="3" class="java.lang.String" itemvalue="markupsafe" />
|
||||
<item index="4" class="java.lang.String" itemvalue="pyrsistent" />
|
||||
<item index="5" class="java.lang.String" itemvalue="certifi" />
|
||||
<item index="6" class="java.lang.String" itemvalue="lxml" />
|
||||
<item index="7" class="java.lang.String" itemvalue="urllib3" />
|
||||
<item index="8" class="java.lang.String" itemvalue="itsdangerous" />
|
||||
<item index="9" class="java.lang.String" itemvalue="jsonschema" />
|
||||
<item index="10" class="java.lang.String" itemvalue="pynacl" />
|
||||
<item index="11" class="java.lang.String" itemvalue="flask-restx" />
|
||||
<item index="12" class="java.lang.String" itemvalue="werkzeug" />
|
||||
<item index="13" class="java.lang.String" itemvalue="six" />
|
||||
<item index="14" class="java.lang.String" itemvalue="aniso8601" />
|
||||
<item index="15" class="java.lang.String" itemvalue="flask-jwt-extended" />
|
||||
<item index="16" class="java.lang.String" itemvalue="cryptography" />
|
||||
<item index="17" class="java.lang.String" itemvalue="click" />
|
||||
<item index="18" class="java.lang.String" itemvalue="attrs" />
|
||||
<item index="19" class="java.lang.String" itemvalue="bcrypt" />
|
||||
<item index="20" class="java.lang.String" itemvalue="jinja2" />
|
||||
<item index="21" class="java.lang.String" itemvalue="pandas" />
|
||||
<item index="22" class="java.lang.String" itemvalue="paramiko" />
|
||||
<item index="23" class="java.lang.String" itemvalue="user-agents" />
|
||||
<item index="24" class="java.lang.String" itemvalue="flask" />
|
||||
<item index="25" class="java.lang.String" itemvalue="pyjwt" />
|
||||
<item index="26" class="java.lang.String" itemvalue="idna" />
|
||||
<item index="27" class="java.lang.String" itemvalue="blinker" />
|
||||
<item index="28" class="java.lang.String" itemvalue="flask-cors" />
|
||||
<item index="29" class="java.lang.String" itemvalue="flask-httpauth" />
|
||||
<item index="30" class="java.lang.String" itemvalue="flask-sitemap" />
|
||||
<item index="31" class="java.lang.String" itemvalue="mariadb" />
|
||||
<item index="32" class="java.lang.String" itemvalue="tqdm" />
|
||||
<item index="33" class="java.lang.String" itemvalue="protobuf" />
|
||||
<item index="34" class="java.lang.String" itemvalue="tabulate" />
|
||||
<item index="35" class="java.lang.String" itemvalue="pytimeparse" />
|
||||
<item index="36" class="java.lang.String" itemvalue="sqlite3-to-mysql" />
|
||||
<item index="37" class="java.lang.String" itemvalue="packaging" />
|
||||
<item index="38" class="java.lang.String" itemvalue="mysql-connector-python" />
|
||||
<item index="39" class="java.lang.String" itemvalue="sqlalchemy" />
|
||||
<item index="40" class="java.lang.String" itemvalue="simplejson" />
|
||||
<item index="41" class="java.lang.String" itemvalue="unittest" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="E402" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
4
.idea/misc.xml
generated
Normal file
4
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (besser_tanken)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/besser_tanken.iml" filepath="$PROJECT_DIR$/.idea/besser_tanken.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/vcs.xml
generated
Normal file
7
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/tankerkoenig-data" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
1948
.ipynb_checkpoints/feather_test-checkpoint.ipynb
Normal file
1948
.ipynb_checkpoints/feather_test-checkpoint.ipynb
Normal file
File diff suppressed because one or more lines are too long
2244
.ipynb_checkpoints/sprit_preis_analyse-checkpoint.ipynb
Normal file
2244
.ipynb_checkpoints/sprit_preis_analyse-checkpoint.ipynb
Normal file
File diff suppressed because one or more lines are too long
0
besser_tanken/__init__.py
Normal file
0
besser_tanken/__init__.py
Normal file
10
besser_tanken/config.py
Normal file
10
besser_tanken/config.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import os
|
||||
|
||||
conf_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
stations_dir = os.path.abspath(
|
||||
os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "stations"))
|
||||
prices_dir = os.path.abspath(
|
||||
os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "prices"))
|
||||
feather_dir = os.path.abspath(
|
||||
os.path.join(conf_dir, os.pardir, "feather_data"))
|
||||
130
besser_tanken/data_loader.py
Normal file
130
besser_tanken/data_loader.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import concurrent
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import date, timedelta
|
||||
from glob import glob
|
||||
from threading import Thread, Lock
|
||||
|
||||
from loguru import logger
|
||||
from besser_tanken.config import stations_dir, prices_dir
|
||||
from besser_tanken.data_tools import fix_date_time_strings
|
||||
|
||||
files_re = re.compile(r"(\d\d\d\d)-(\d\d)-(\d\d)-(\S*).csv")
|
||||
|
||||
prices_cache = dict()
|
||||
stations_cache = dict()
|
||||
|
||||
|
||||
def load_date_hierarchical_file(base_dir, year=None, month=None,
|
||||
day=None) -> str:
|
||||
year_dirs = glob(f"{base_dir}/*/")
|
||||
years = []
|
||||
for y_d in year_dirs:
|
||||
y = y_d[:-1].rsplit('/', maxsplit=1)[1]
|
||||
years.append(int(y))
|
||||
if year is None:
|
||||
year = sorted(years)[-1]
|
||||
if int(year) not in years:
|
||||
raise ValueError(f"No file for year {year}!")
|
||||
year_dir = f"{base_dir}/{year}"
|
||||
|
||||
month_dirs = glob(f"{year_dir}/*/")
|
||||
months = []
|
||||
for m_d in month_dirs:
|
||||
m = m_d[:-1].rsplit('/', maxsplit=1)[1]
|
||||
months.append(int(m))
|
||||
if month is None:
|
||||
month = sorted(months)[-1]
|
||||
if int(month) not in months:
|
||||
raise ValueError(f"No file found for month {month}!")
|
||||
month_dir = f"{year_dir}/{month:02d}"
|
||||
|
||||
day_files = glob(f"{month_dir}/*.csv")
|
||||
days = []
|
||||
file_type = None
|
||||
for d in day_files:
|
||||
match = files_re.search(d)
|
||||
if match:
|
||||
days.append(int(match.group(3)))
|
||||
file_type = match.group(4)
|
||||
if day is None:
|
||||
day = sorted(days)[-1]
|
||||
if int(day) not in days or file_type is None:
|
||||
raise ValueError(f"No file found for day {day}!")
|
||||
|
||||
return os.path.join(month_dir,
|
||||
f"{year}-{month:02d}-{day:02d}-{file_type}.csv")
|
||||
|
||||
|
||||
def load_prices(year=None, month=None, day=None) -> list:
|
||||
file = load_date_hierarchical_file(prices_dir, year, month, day)
|
||||
if file in prices_cache:
|
||||
logger.info(f"returning prices from cache for file: {file}")
|
||||
return prices_cache[file]
|
||||
logger.info(f"loading prices file: {file}")
|
||||
with open(file, newline='') as csv_file:
|
||||
res = list(csv.DictReader(csv_file))
|
||||
logger.info(f"got {len(res)} prices")
|
||||
res = fix_date_time_strings(res)
|
||||
prices_cache[file] = res
|
||||
return res
|
||||
|
||||
|
||||
def load_prices_from(year=None, month=None, day=None, threading=True) -> list:
|
||||
today = date.today()
|
||||
start_date = date(year, month, day)
|
||||
res = []
|
||||
|
||||
def _threaded_loading(y: int, m: int, d: int):
|
||||
try:
|
||||
return load_prices(y, m, d)
|
||||
except ValueError:
|
||||
return None # ignore ValueError, just do nothing
|
||||
|
||||
if threading:
|
||||
with ThreadPoolExecutor(max_workers=16) as executor:
|
||||
futures = []
|
||||
while start_date < today:
|
||||
# Thread(target=_threaded_loading, args=(start_date.year,
|
||||
# start_date.month,
|
||||
# start_date.day,)).start()
|
||||
futures.append(
|
||||
executor.submit(_threaded_loading, y=start_date.year,
|
||||
m=start_date.month, d=start_date.day))
|
||||
start_date = start_date + timedelta(days=1)
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
r = future.result()
|
||||
if r is not None:
|
||||
res.extend(r)
|
||||
except TimeoutError:
|
||||
print("timeout occured")
|
||||
else:
|
||||
while start_date < today:
|
||||
try:
|
||||
res.extend(load_prices(start_date.year,
|
||||
start_date.month,
|
||||
start_date.day))
|
||||
|
||||
start_date = start_date + timedelta(days=1)
|
||||
except ValueError:
|
||||
return res
|
||||
return res
|
||||
|
||||
|
||||
def load_stations(year=None, month=None, day=None) -> list:
|
||||
file = load_date_hierarchical_file(stations_dir, year, month, day)
|
||||
if file in stations_cache:
|
||||
logger.info(f"returning stations from cache for file: {file}")
|
||||
return stations_cache[file]
|
||||
with open(file, newline='') as csv_file:
|
||||
res = list(csv.DictReader(csv_file))
|
||||
logger.info(f"got {len(res)} stations")
|
||||
stations_cache[file] = res
|
||||
return res
|
||||
|
||||
# stations_dir
|
||||
43
besser_tanken/data_tools.py
Normal file
43
besser_tanken/data_tools.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from collections import defaultdict
|
||||
|
||||
import feather
|
||||
import pandas
|
||||
|
||||
from besser_tanken.config import stations_dir
|
||||
|
||||
|
||||
def group_prices(prices):
|
||||
grouped_prices = defaultdict(list)
|
||||
for p in prices:
|
||||
grouped_prices[p['station_uuid']].append(p)
|
||||
return grouped_prices
|
||||
|
||||
|
||||
def fix_date_time_strings(price_dict_list):
|
||||
for p in price_dict_list:
|
||||
try:
|
||||
date_time_string, tz_string = p['date'].rsplit('+', maxsplit=1)
|
||||
if ':' in tz_string:
|
||||
tz_string.replace(':', '')
|
||||
else:
|
||||
tz_string_len = len(tz_string)
|
||||
if tz_string_len < 4:
|
||||
tz_string += '0' * (4 - tz_string_len)
|
||||
p['date'] = date_time_string + "+" + tz_string
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
return price_dict_list
|
||||
|
||||
|
||||
def convert_csv_to_feather(csv_file: str, feather_file: str = None):
|
||||
path = 'my_data.feather'
|
||||
df = pandas.read_csv(csv_file)
|
||||
if feather_file is None:
|
||||
feather_file = csv_file.rsplit('.', maxsplit=1)[0] + '.feather'
|
||||
feather.write_dataframe(df, feather_file)
|
||||
# df = feather.read_dataframe(path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_csv_to_feather(f"{stations_dir}/2021/01/2021-01-01-stations.csv")
|
||||
45
besser_tanken/main.py
Normal file
45
besser_tanken/main.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import date, timedelta, datetime
|
||||
from glob import glob
|
||||
from pprint import pprint
|
||||
from typing import Union
|
||||
from loguru import logger
|
||||
|
||||
from besser_tanken.data_loader import load_stations, load_prices_from, \
|
||||
load_prices
|
||||
from besser_tanken.data_tools import group_prices
|
||||
|
||||
|
||||
def _test_get_buehl_stations():
|
||||
for s in load_stations():
|
||||
if s['post_code'] in ['77815']:
|
||||
yield s
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
buehl_station = next(_test_get_buehl_stations())
|
||||
print(buehl_station)
|
||||
for p in load_prices():
|
||||
# for p in load_prices_from(2021, 4, 1):
|
||||
print(p['date'])
|
||||
print(datetime.strptime(p['date'],
|
||||
'%Y-%m-%d %H:%M:%S%z'))
|
||||
# exit()
|
||||
|
||||
print(p)
|
||||
print(p['date'])
|
||||
# if ":" == p['date'][-3]:
|
||||
# p['date'] = p['date'][:-3] + p['date'][-2:]
|
||||
# print(p['date'])
|
||||
datetime_object = datetime.strptime(p['date'] + "00",
|
||||
'%Y-%m-%d %H:%M:%S%z')
|
||||
print(datetime_object)
|
||||
exit()
|
||||
prices = load_prices_from(2021, 4, 1)
|
||||
# print(prices)
|
||||
print(len(prices))
|
||||
prices = group_prices(prices)
|
||||
pprint(prices[buehl_station['uuid']])
|
||||
6
besser_tanken/tests.py
Normal file
6
besser_tanken/tests.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import pygeodb
|
||||
print(pygeodb.distance("42897", "50933")) # strings
|
||||
|
||||
print(pygeodb.distance("77815", "78247")) # strings
|
||||
|
||||
# https://pypi.org/project/pyGeoDb/
|
||||
642
feather_test.ipynb
Normal file
642
feather_test.ipynb
Normal file
File diff suppressed because one or more lines are too long
2214
poetry.lock
generated
Normal file
2214
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
pyproject.toml
Normal file
21
pyproject.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[tool.poetry]
|
||||
name = "besser_tanken"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Tobias Kurze <it@t-kurze.de>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
loguru = "^0.5.3"
|
||||
seaborn = "^0.11.1"
|
||||
earthpy = "^0.9.2"
|
||||
jupyter = "^1.0.0"
|
||||
jupyter_nbextensions_configurator = "^0.4.1"
|
||||
feather-format = "^0.4.1"
|
||||
pygeodb = {git = "https://github.com/tkurze/pyGeoDb.git"}
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
2244
sprit_preis_analyse.ipynb
Normal file
2244
sprit_preis_analyse.ipynb
Normal file
File diff suppressed because one or more lines are too long
1
tankerkoenig-data
Submodule
1
tankerkoenig-data
Submodule
Submodule tankerkoenig-data added at afed4bbe40
Reference in New Issue
Block a user