import os import pandas import yfinance as yf from get_all_tickers.get_tickers import get_tickers from pandas import DataFrame PROJ_PATH = os.path.dirname(__file__) ticker_data_file = os.path.join(PROJ_PATH, "ticker_data_NYSE.hdf") def main(): all_tickers = get_tickers(NASDAQ=False, AMEX=False) print(all_tickers) data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", ) # data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", ) data = yf.download(tickers=" ".join(all_tickers), interval="1d") # yf.download() print(data) print(type(data)) # data.to_csv(ticker_data_file) def test(): nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) print(nyse_tickers) # data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="60m", ) data = yf.download(tickers="SPL AAPL", start="2017-01-01", end="2017-04-30", interval="1d", ) print(data) print(data.keys()) data.to_hdf(ticker_data_file, key='Date') exit() def download_test_data(): if os.path.exists(ticker_data_file): print("file already exists, won't download") return nyse_tickers = get_tickers(NASDAQ=False, AMEX=False) data = yf.download(tickers=" ".join(nyse_tickers), interval="1d") print("storing data ...") data.to_hdf(ticker_data_file, key="Date") print("storing data Done") def transform_data() -> DataFrame: print("reading data ...") data = pandas.read_hdf(ticker_data_file) print("reading done ...") return data def get_day_x_delta_data(df: DataFrame, day_delta: int = 1, absolute=False) -> DataFrame: if absolute: return df.diff(day_delta).add_suffix('_{}d'.format(day_delta)) return (df.diff(day_delta) * 100 / df).add_suffix('_{}d%'.format(day_delta)) if __name__ == '__main__': # main() try: data = transform_data() except FileNotFoundError: download_test_data() data = transform_data() print("keys:") print(data.keys()) print("index:") print(data.index) print(data) print(get_day_x_delta_data(data, 1, True)) print(get_day_x_delta_data(data, 1)) exit() print(data.isnull().sum() / data.shape[0] * 100) first_loc = data.index.get_loc(data.index[0]) print(type(data)) # print(data.diff()) for day_diff in range(1, 8): # one to 7 days d_day = data.diff(day_diff) / data d_day = d_day.add_suffix('-1%') print(d_day) print(data.diff(2) / data) print(pandas.concat([data, d_day])) # print(data["-1"])