import os import pandas import yfinance as yf from get_all_tickers.get_tickers import get_tickers from tensorflow.keras.callbacks import History from pandas import DataFrame PROJ_PATH = os.path.dirname(__file__) ticker_data_file = os.path.join(PROJ_PATH, "ticker_data_NYSE.hdf") model_weights_file = os.path.join(PROJ_PATH, "model_weights.bin") def main(): all_tickers = get_tickers(NASDAQ=False, AMEX=False) print(all_tickers) data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", ) # data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", ) data = yf.download(tickers=" ".join(all_tickers), interval="1d") # yf.download() print(data) print(type(data)) # data.to_csv(ticker_data_file) def test(): nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) print(nyse_tickers) # data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", ) data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", ) print(data) print(data.keys()) data.to_hdf(ticker_data_file, key='Date') exit() def download_test_data(): if os.path.exists(ticker_data_file): print("file already exists, won't download") return nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) data = yf.download(tickers=" ".join(nyse_tickers), interval="1d") print("storing data ...") data.to_hdf(ticker_data_file, key="Date") print("storing data Done") def transform_data() -> DataFrame: print("reading data ...") data = pandas.read_hdf(ticker_data_file) print("reading done ...") return data def get_day_x_delta_data(df: DataFrame, day_delta: int = 1, absolute=False) -> DataFrame: if absolute: return df.diff(day_delta).add_suffix('_{}d'.format(day_delta)) return (df.diff(day_delta) * 100 / df).add_suffix('_{}d%'.format(day_delta)) def create_model(): model = Sequential() # exit() ## exit model.add(Dense(350, input_dim=x_train.shape[1], activation="relu")) # The input_dim =44, since the width of the training data=44 (refer data engg section) model.add(Dense(350, activation="relu")) model.add(Dense(350, activation="relu")) model.add(Dense(350, activation="relu")) model.add(Dense(350, activation="relu")) model.add(Dense(1, activation="linear")) # Configure the model model.compile(optimizer='adam', loss="mean_absolute_error", metrics=["mean_absolute_error"]) return model def train_model(model=None): # Train the model if model is None: model = create_model() history = History() model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=15, batch_size=64, callbacks=[history]) # Use the model's evaluate method to predict and evaluate the test datasets result = model.evaluate(x_test.values, y_test.values) # Print the results for i in range(len(model.metrics_names)): print("Metric ", model.metrics_names[i], ":", str(round(result[i], 2))) model.save_weights(model_weights_file) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title("Model's Training & Validation loss across epochs") plt.ylabel('Loss') plt.xlabel('Epochs') plt.legend(['Train', 'Validation'], loc='upper right') plt.show() return model if __name__ == '__main__': # main() try: data = transform_data() except FileNotFoundError: download_test_data() data = transform_data() print("keys:") print(data.keys()) print("index:") print(data.index) data = data.drop('Volume', axis=1, level=0) # drop Volume data data = data.drop('Open', axis=1, level=0) # drop Open data data = data.drop('Low', axis=1, level=0) # drop Low data data = data.drop('High', axis=1, level=0) # drop High data data = data.drop('Close', axis=1, level=0) # drop Close data # data.columns = [' '.join(col).strip() for col in data.columns.values] # only keep level 1 (of 0 and 1) columns: data.columns = data.columns.get_level_values(1) print(data) print(data.keys()) print(data.iloc[0]) # print(get_day_x_delta_data(data, 1, True)) SYM = 'ZTS' data.dropna(subset=[SYM], inplace=True) d1 = get_day_x_delta_data(data, 1) d7 = get_day_x_delta_data(data, 7) # print(d1.corr()) print(d1.head()) # d1.dropna(subset=[f'{SYM}_1d%'], inplace=True) # d7.dropna(subset=[f'{SYM}_7d%'], inplace=True) d1[f'{SYM}_7d%'] = d7[f'{SYM}_7d%'] # add 7 day data to one day relative performance data d1.dropna(subset=[f'{SYM}_1d%', f'{SYM}_7d%'], inplace=True) d7.dropna(subset=[f'{SYM}_7d%'], inplace=True) d1.fillna(0, inplace=True) d7.fillna(0, inplace=True) print(d1.head(100)) print(d1.shape) import matplotlib.pyplot as plt import seaborn as sns # Seaborn is another powerful visualization library for Python # sns.lineplot(data=data, x='Date', y=f'{SYM}') # sns.lineplot(data=d1, x='Date', y=f'{SYM}_1d%') # plt.show() performance = 1 for v in d1[f'{SYM}_1d%']: performance *= (1 + (v / 100)) print(performance) print("Distinct Datatypes:", data.dtypes.unique()) target = [f'{SYM}_7d%'] from sklearn.model_selection import train_test_split # Create train and test dataset with an 80:20 split x_train, x_test, y_train, y_test = train_test_split(d1, d7[target], test_size=0.2, random_state=2018) # Further divide training dataset into train and validation dataset with an 90:10 split x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=2018) print("Shape of x_train:", x_train.shape) print("Shape of x_val:", x_val.shape) print("Shape of x_test:", x_test.shape) print("Shape of y_train:", y_train.shape) print("Shape of y_val:", y_val.shape) print("Shape of y_test:", y_test.shape) print(y_test) # calculate the average score of the train dataset mean_sales = y_train.mean() print("Average Sales :", mean_sales) # Calculate the Mean Absolute Error on the test dataset print("MAE for Test Data:", abs(y_test - mean_sales).mean()) import os os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" os.environ["PLAIDML_EXPERIMENTAL"] = "1" os.environ["PLAIDML_DEVICE_IDS"] = "opencl_amd_gfx1010.0" import plaidml.keras plaidml.keras.install_backend() import keras import tensorflow.keras # Create Deep Neural Network Architecture from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout new_model = False if new_model or (not os.path.exists(model_weights_file)): print("creating new model") model = train_model() else: model = create_model() print("loading existing weights model") model.load_weights(model_weights_file) model.summary() exit() print(data.isnull().sum() / data.shape[0] * 100) first_loc = data.index.get_loc(data.index[0]) print(type(data)) # print(data.diff()) for day_diff in range(1, 8): # one to 7 days d_day = data.diff(day_diff) / data d_day = d_day.add_suffix('-1%') print(d_day) print(data.diff(2) / data) print(pandas.concat([data, d_day])) # print(data["-1"])