added geopy

2021-08-16 20:00:17 +02:00
commit 633409476b
23 changed files with 9669 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 __pycache__/
 __pycache__/*
 besser_tanken/__pycache__
 merged.feather
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
 [submodule "tankerkoenig-data"]
 	path = tankerkoenig-data
 	url = https://tankerkoenig@dev.azure.com/tankerkoenig/tankerkoenig-data/_git/tankerkoenig-data
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
 # Default ignored files
 /shelf/
 /workspace.xml
--- a/.idea/besser_tanken.iml
+++ b/.idea/besser_tanken.iml
@@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/csv-plugin.xml
+++ b/.idea/csv-plugin.xml
@@ -0,0 +1,16 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="CsvFileAttributes">
    <option name="attributeMap">
      <map>
        <entry key="/tankerkoenig-data/stations/2021/02/2021-02-07-stations.csv">
          <value>
            <Attribute>
              <option name="separator" value="," />
            </Attribute>
          </value>
        </entry>
      </map>
    </option>
  </component>
 </project>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,62 @@
 <component name="InspectionProjectProfileManager">
  <profile version="1.0">
    <option name="myName" value="Project Default" />
    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
      <option name="ignoredPackages">
        <value>
          <list size="42">
            <item index="0" class="java.lang.String" itemvalue="cffi" />
            <item index="1" class="java.lang.String" itemvalue="numpy" />
            <item index="2" class="java.lang.String" itemvalue="requests" />
            <item index="3" class="java.lang.String" itemvalue="markupsafe" />
            <item index="4" class="java.lang.String" itemvalue="pyrsistent" />
            <item index="5" class="java.lang.String" itemvalue="certifi" />
            <item index="6" class="java.lang.String" itemvalue="lxml" />
            <item index="7" class="java.lang.String" itemvalue="urllib3" />
            <item index="8" class="java.lang.String" itemvalue="itsdangerous" />
            <item index="9" class="java.lang.String" itemvalue="jsonschema" />
            <item index="10" class="java.lang.String" itemvalue="pynacl" />
            <item index="11" class="java.lang.String" itemvalue="flask-restx" />
            <item index="12" class="java.lang.String" itemvalue="werkzeug" />
            <item index="13" class="java.lang.String" itemvalue="six" />
            <item index="14" class="java.lang.String" itemvalue="aniso8601" />
            <item index="15" class="java.lang.String" itemvalue="flask-jwt-extended" />
            <item index="16" class="java.lang.String" itemvalue="cryptography" />
            <item index="17" class="java.lang.String" itemvalue="click" />
            <item index="18" class="java.lang.String" itemvalue="attrs" />
            <item index="19" class="java.lang.String" itemvalue="bcrypt" />
            <item index="20" class="java.lang.String" itemvalue="jinja2" />
            <item index="21" class="java.lang.String" itemvalue="pandas" />
            <item index="22" class="java.lang.String" itemvalue="paramiko" />
            <item index="23" class="java.lang.String" itemvalue="user-agents" />
            <item index="24" class="java.lang.String" itemvalue="flask" />
            <item index="25" class="java.lang.String" itemvalue="pyjwt" />
            <item index="26" class="java.lang.String" itemvalue="idna" />
            <item index="27" class="java.lang.String" itemvalue="blinker" />
            <item index="28" class="java.lang.String" itemvalue="flask-cors" />
            <item index="29" class="java.lang.String" itemvalue="flask-httpauth" />
            <item index="30" class="java.lang.String" itemvalue="flask-sitemap" />
            <item index="31" class="java.lang.String" itemvalue="mariadb" />
            <item index="32" class="java.lang.String" itemvalue="tqdm" />
            <item index="33" class="java.lang.String" itemvalue="protobuf" />
            <item index="34" class="java.lang.String" itemvalue="tabulate" />
            <item index="35" class="java.lang.String" itemvalue="pytimeparse" />
            <item index="36" class="java.lang.String" itemvalue="sqlite3-to-mysql" />
            <item index="37" class="java.lang.String" itemvalue="packaging" />
            <item index="38" class="java.lang.String" itemvalue="mysql-connector-python" />
            <item index="39" class="java.lang.String" itemvalue="sqlalchemy" />
            <item index="40" class="java.lang.String" itemvalue="simplejson" />
            <item index="41" class="java.lang.String" itemvalue="unittest" />
          </list>
        </value>
      </option>
    </inspection_tool>
    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
      <option name="ignoredErrors">
        <list>
          <option value="E402" />
        </list>
      </option>
    </inspection_tool>
  </profile>
 </component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Poetry (besser_tanken)" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/besser_tanken.iml" filepath="$PROJECT_DIR$/.idea/besser_tanken.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/tankerkoenig-data" vcs="Git" />
  </component>
 </project>
--- a/.ipynb_checkpoints/feather_test-checkpoint.ipynb
+++ b/.ipynb_checkpoints/feather_test-checkpoint.ipynb
--- a/.ipynb_checkpoints/sprit_preis_analyse-checkpoint.ipynb
+++ b/.ipynb_checkpoints/sprit_preis_analyse-checkpoint.ipynb
--- a/besser_tanken/init.py
+++ b/besser_tanken/init.py
--- a/besser_tanken/config.py
+++ b/besser_tanken/config.py
@@ -0,0 +1,10 @@
 import os
 conf_dir = os.path.dirname(os.path.abspath(__file__))
 stations_dir = os.path.abspath(
    os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "stations"))
 prices_dir = os.path.abspath(
    os.path.join(conf_dir, os.pardir, "tankerkoenig-data", "prices"))
 feather_dir = os.path.abspath(
    os.path.join(conf_dir, os.pardir, "feather_data"))
--- a/besser_tanken/data_loader.py
+++ b/besser_tanken/data_loader.py
@@ -0,0 +1,130 @@
 import concurrent
 import csv
 import os
 import re
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import date, timedelta
 from glob import glob
 from threading import Thread, Lock
 from loguru import logger
 from besser_tanken.config import stations_dir, prices_dir
 from besser_tanken.data_tools import fix_date_time_strings
 files_re = re.compile(r"(\d\d\d\d)-(\d\d)-(\d\d)-(\S*).csv")
 prices_cache = dict()
 stations_cache = dict()
 def load_date_hierarchical_file(base_dir, year=None, month=None,
                                day=None) -> str:
    year_dirs = glob(f"{base_dir}/*/")
    years = []
    for y_d in year_dirs:
        y = y_d[:-1].rsplit('/', maxsplit=1)[1]
        years.append(int(y))
    if year is None:
        year = sorted(years)[-1]
    if int(year) not in years:
        raise ValueError(f"No file for year {year}!")
    year_dir = f"{base_dir}/{year}"
    month_dirs = glob(f"{year_dir}/*/")
    months = []
    for m_d in month_dirs:
        m = m_d[:-1].rsplit('/', maxsplit=1)[1]
        months.append(int(m))
    if month is None:
        month = sorted(months)[-1]
    if int(month) not in months:
        raise ValueError(f"No file found for month {month}!")
    month_dir = f"{year_dir}/{month:02d}"
    day_files = glob(f"{month_dir}/*.csv")
    days = []
    file_type = None
    for d in day_files:
        match = files_re.search(d)
        if match:
            days.append(int(match.group(3)))
            file_type = match.group(4)
    if day is None:
        day = sorted(days)[-1]
    if int(day) not in days or file_type is None:
        raise ValueError(f"No file found for day {day}!")
    return os.path.join(month_dir,
                        f"{year}-{month:02d}-{day:02d}-{file_type}.csv")
 def load_prices(year=None, month=None, day=None) -> list:
    file = load_date_hierarchical_file(prices_dir, year, month, day)
    if file in prices_cache:
        logger.info(f"returning prices from cache for file: {file}")
        return prices_cache[file]
    logger.info(f"loading prices file: {file}")
    with open(file, newline='') as csv_file:
        res = list(csv.DictReader(csv_file))
        logger.info(f"got {len(res)} prices")
        res = fix_date_time_strings(res)
        prices_cache[file] = res
        return res
 def load_prices_from(year=None, month=None, day=None, threading=True) -> list:
    today = date.today()
    start_date = date(year, month, day)
    res = []
    def _threaded_loading(y: int, m: int, d: int):
        try:
            return load_prices(y, m, d)
        except ValueError:
            return None  # ignore ValueError, just do nothing
    if threading:
        with ThreadPoolExecutor(max_workers=16) as executor:
            futures = []
            while start_date < today:
                # Thread(target=_threaded_loading, args=(start_date.year,
                #                                       start_date.month,
                #                                       start_date.day,)).start()
                futures.append(
                    executor.submit(_threaded_loading, y=start_date.year,
                                    m=start_date.month, d=start_date.day))
                start_date = start_date + timedelta(days=1)
            for future in as_completed(futures):
                try:
                    r = future.result()
                    if r is not None:
                        res.extend(r)
                except TimeoutError:
                    print("timeout occured")
    else:
        while start_date < today:
            try:
                res.extend(load_prices(start_date.year,
                                       start_date.month,
                                       start_date.day))
                start_date = start_date + timedelta(days=1)
            except ValueError:
                return res
    return res
 def load_stations(year=None, month=None, day=None) -> list:
    file = load_date_hierarchical_file(stations_dir, year, month, day)
    if file in stations_cache:
        logger.info(f"returning stations from cache for file: {file}")
        return stations_cache[file]
    with open(file, newline='') as csv_file:
        res = list(csv.DictReader(csv_file))
        logger.info(f"got {len(res)} stations")
        stations_cache[file] = res
        return res
    # stations_dir
--- a/besser_tanken/data_tools.py
+++ b/besser_tanken/data_tools.py
@@ -0,0 +1,43 @@
 from collections import defaultdict
 import feather
 import pandas
 from besser_tanken.config import stations_dir
 def group_prices(prices):
    grouped_prices = defaultdict(list)
    for p in prices:
        grouped_prices[p['station_uuid']].append(p)
    return grouped_prices
 def fix_date_time_strings(price_dict_list):
    for p in price_dict_list:
        try:
            date_time_string, tz_string = p['date'].rsplit('+', maxsplit=1)
            if ':' in tz_string:
                tz_string.replace(':', '')
            else:
                tz_string_len = len(tz_string)
                if tz_string_len < 4:
                    tz_string += '0' * (4 - tz_string_len)
            p['date'] = date_time_string + "+" + tz_string
        except IndexError:
            pass
    return price_dict_list
 def convert_csv_to_feather(csv_file: str, feather_file: str = None):
    path = 'my_data.feather'
    df = pandas.read_csv(csv_file)
    if feather_file is None:
        feather_file = csv_file.rsplit('.', maxsplit=1)[0] + '.feather'
    feather.write_dataframe(df, feather_file)
    # df = feather.read_dataframe(path)
 if __name__ == '__main__':
    convert_csv_to_feather(f"{stations_dir}/2021/01/2021-01-01-stations.csv")
--- a/besser_tanken/main.py
+++ b/besser_tanken/main.py
@@ -0,0 +1,45 @@
 import csv
 import os
 import re
 from collections import defaultdict
 from datetime import date, timedelta, datetime
 from glob import glob
 from pprint import pprint
 from typing import Union
 from loguru import logger
 from besser_tanken.data_loader import load_stations, load_prices_from, \
    load_prices
 from besser_tanken.data_tools import group_prices
 def _test_get_buehl_stations():
    for s in load_stations():
        if s['post_code'] in ['77815']:
            yield s
 if __name__ == '__main__':
    buehl_station = next(_test_get_buehl_stations())
    print(buehl_station)
    for p in load_prices():
        # for p in load_prices_from(2021, 4, 1):
        print(p['date'])
        print(datetime.strptime(p['date'],
                                '%Y-%m-%d %H:%M:%S%z'))
    # exit()
    print(p)
    print(p['date'])
    # if ":" == p['date'][-3]:
    #    p['date'] = p['date'][:-3] + p['date'][-2:]
    #    print(p['date'])
    datetime_object = datetime.strptime(p['date'] + "00",
                                        '%Y-%m-%d %H:%M:%S%z')
    print(datetime_object)
    exit()
    prices = load_prices_from(2021, 4, 1)
    # print(prices)
    print(len(prices))
    prices = group_prices(prices)
    pprint(prices[buehl_station['uuid']])
--- a/besser_tanken/tests.py
+++ b/besser_tanken/tests.py
@@ -0,0 +1,6 @@
 import pygeodb
 print(pygeodb.distance("42897", "50933")) # strings
 print(pygeodb.distance("77815", "78247")) # strings
 # https://pypi.org/project/pyGeoDb/
--- a/feather_test.ipynb
+++ b/feather_test.ipynb
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,21 @@
 [tool.poetry]
 name = "besser_tanken"
 version = "0.1.0"
 description = ""
 authors = ["Tobias Kurze <it@t-kurze.de>"]
 [tool.poetry.dependencies]
 python = "^3.9"
 loguru = "^0.5.3"
 seaborn = "^0.11.1"
 earthpy = "^0.9.2"
 jupyter = "^1.0.0"
 jupyter_nbextensions_configurator = "^0.4.1"
 feather-format = "^0.4.1"
 pygeodb = {git = "https://github.com/tkurze/pyGeoDb.git"}
 [tool.poetry.dev-dependencies]
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
--- a/sprit_preis_analyse.ipynb
+++ b/sprit_preis_analyse.ipynb
--- a/1
+++ b/1