Files
neuroinvest/main.py

238 lines
7.4 KiB
Python

import os
import pandas
import yfinance as yf
from get_all_tickers.get_tickers import get_tickers
from tensorflow.keras.callbacks import History
from pandas import DataFrame
PROJ_PATH = os.path.dirname(__file__)
ticker_data_file = os.path.join(PROJ_PATH, "ticker_data_NYSE.hdf")
model_weights_file = os.path.join(PROJ_PATH, "model_weights.bin")
def main():
all_tickers = get_tickers(NASDAQ=False, AMEX=False)
print(all_tickers)
data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", )
# data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", )
data = yf.download(tickers=" ".join(all_tickers), interval="1d")
# yf.download()
print(data)
print(type(data))
# data.to_csv(ticker_data_file)
def test():
nyse_tickers = get_tickers(NASDAQ=False, AMEX=False)
print(nyse_tickers)
# data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", )
data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", )
print(data)
print(data.keys())
data.to_hdf(ticker_data_file, key='Date')
exit()
def download_test_data():
if os.path.exists(ticker_data_file):
print("file already exists, won't download")
return
nyse_tickers = get_tickers(NASDAQ=False, AMEX=False)
data = yf.download(tickers=" ".join(nyse_tickers), interval="1d")
print("storing data ...")
data.to_hdf(ticker_data_file, key="Date")
print("storing data Done")
def transform_data() -> DataFrame:
print("reading data ...")
data = pandas.read_hdf(ticker_data_file)
print("reading done ...")
return data
def get_day_x_delta_data(df: DataFrame, day_delta: int = 1, absolute=False) -> DataFrame:
if absolute:
return df.diff(day_delta).add_suffix('_{}d'.format(day_delta))
return (df.diff(day_delta) * 100 / df).add_suffix('_{}d%'.format(day_delta))
def create_model():
model = Sequential()
# exit() ## exit
model.add(Dense(350, input_dim=x_train.shape[1], activation="relu"))
# The input_dim =44, since the width of the training data=44 (refer data engg section)
model.add(Dense(350, activation="relu"))
model.add(Dense(350, activation="relu"))
model.add(Dense(350, activation="relu"))
model.add(Dense(350, activation="relu"))
model.add(Dense(1, activation="linear"))
# Configure the model
model.compile(optimizer='adam', loss="mean_absolute_error",
metrics=["mean_absolute_error"])
return model
def train_model(model=None):
# Train the model
if model is None:
model = create_model()
history = History()
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=15, batch_size=64, callbacks=[history])
# Use the model's evaluate method to predict and evaluate the test datasets
result = model.evaluate(x_test.values, y_test.values)
# Print the results
for i in range(len(model.metrics_names)):
print("Metric ", model.metrics_names[i], ":", str(round(result[i], 2)))
model.save_weights(model_weights_file)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Model's Training & Validation loss across epochs")
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()
return model
if __name__ == '__main__':
# main()
try:
data = transform_data()
except FileNotFoundError:
download_test_data()
data = transform_data()
print("keys:")
print(data.keys())
print("index:")
print(data.index)
data = data.drop('Volume', axis=1, level=0) # drop Volume data
data = data.drop('Open', axis=1, level=0) # drop Open data
data = data.drop('Low', axis=1, level=0) # drop Low data
data = data.drop('High', axis=1, level=0) # drop High data
data = data.drop('Close', axis=1, level=0) # drop Close data
# data.columns = [' '.join(col).strip() for col in data.columns.values]
# only keep level 1 (of 0 and 1) columns:
data.columns = data.columns.get_level_values(1)
print(data)
print(data.keys())
print(data.iloc[0])
# print(get_day_x_delta_data(data, 1, True))
SYM = 'ZTS'
data.dropna(subset=[SYM], inplace=True)
d1 = get_day_x_delta_data(data, 1)
d7 = get_day_x_delta_data(data, 7)
# print(d1.corr())
print(d1.head())
# d1.dropna(subset=[f'{SYM}_1d%'], inplace=True)
# d7.dropna(subset=[f'{SYM}_7d%'], inplace=True)
d1[f'{SYM}_7d%'] = d7[f'{SYM}_7d%'] # add 7 day data to one day relative performance data
d1.dropna(subset=[f'{SYM}_1d%', f'{SYM}_7d%'], inplace=True)
d7.dropna(subset=[f'{SYM}_7d%'], inplace=True)
d1.fillna(0, inplace=True)
d7.fillna(0, inplace=True)
print(d1.head(100))
print(d1.shape)
import matplotlib.pyplot as plt
import seaborn as sns # Seaborn is another powerful visualization library for Python
# sns.lineplot(data=data, x='Date', y=f'{SYM}')
# sns.lineplot(data=d1, x='Date', y=f'{SYM}_1d%')
# plt.show()
performance = 1
for v in d1[f'{SYM}_1d%']:
performance *= (1 + (v / 100))
print(performance)
print("Distinct Datatypes:", data.dtypes.unique())
target = [f'{SYM}_7d%']
from sklearn.model_selection import train_test_split
# Create train and test dataset with an 80:20 split
x_train, x_test, y_train, y_test = train_test_split(d1, d7[target], test_size=0.2, random_state=2018)
# Further divide training dataset into train and validation dataset with an 90:10 split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=2018)
print("Shape of x_train:", x_train.shape)
print("Shape of x_val:", x_val.shape)
print("Shape of x_test:", x_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of y_test:", y_test.shape)
print(y_test)
# calculate the average score of the train dataset
mean_sales = y_train.mean()
print("Average Sales :", mean_sales)
# Calculate the Mean Absolute Error on the test dataset
print("MAE for Test Data:", abs(y_test - mean_sales).mean())
import os
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
os.environ["PLAIDML_EXPERIMENTAL"] = "1"
os.environ["PLAIDML_DEVICE_IDS"] = "opencl_amd_gfx1010.0"
import plaidml.keras
plaidml.keras.install_backend()
import keras
import tensorflow.keras
# Create Deep Neural Network Architecture
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
new_model = False
if new_model or (not os.path.exists(model_weights_file)):
print("creating new model")
model = train_model()
else:
model = create_model()
print("loading existing weights model")
model.load_weights(model_weights_file)
model.summary()
exit()
print(data.isnull().sum() / data.shape[0] * 100)
first_loc = data.index.get_loc(data.index[0])
print(type(data))
# print(data.diff())
for day_diff in range(1, 8): # one to 7 days
d_day = data.diff(day_diff) / data
d_day = d_day.add_suffix('-1%')
print(d_day)
print(data.diff(2) / data)
print(pandas.concat([data, d_day]))
# print(data["-1"])